iommu/arm-smmu: Merge of smmu changes from 4.14 to msm-kona

This patch is to merge all of iommu/smmu, DMA, fast and lazy
mapping changes from msm-4.14 to msm-kona.

Change-Id: I08777236f45357b7cf44f5c14c80b78eaa482294
Signed-off-by: Swathi Sridhar <swatsrid@codeaurora.org>
This commit is contained in:
Swathi Sridhar 2018-07-17 15:34:46 -07:00
parent b1dc513c03
commit 4008eb493a
45 changed files with 12079 additions and 463 deletions

View file

@ -156,3 +156,10 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
subsystem that the buffer is fully accessible at the elevated privilege
level (and ideally inaccessible or at least read-only at the
lesser-privileged levels).
DMA_ATTR_IOMMU_USE_LLC_NWA
------------------------------------
DMA_ATTR_IOMMU_USE_LLC_NWA: Overrides the bus attributes to use
System Cache(LLC) with allocation policy as Inner Non-Cacheable, Outer Cacheable:
Write-Back, Read-Allocate, No Write-Allocate policy.

View file

@ -17,12 +17,16 @@ conditions.
"arm,mmu-401"
"arm,mmu-500"
"cavium,smmu-v2"
"qcom,smmu-v2"
"qcom,qsmmu-v500"
depending on the particular implementation and/or the
version of the architecture implemented.
- reg : Base address and size of the SMMU.
- reg-names : For the "qcom,qsmmu-v500" device "tcu-base" is expected.
- #global-interrupts : The number of global interrupts exposed by the
device.
@ -70,6 +74,88 @@ conditions.
property is not valid for SMMUs using stream indexing,
or using stream matching with #iommu-cells = <2>, and
may be ignored if present in such cases.
- attach-impl-defs : global registers to program at device attach
time. This should be a list of 2-tuples of the format:
<offset reg_value>.
- qcom,fatal-asf : Enable BUG_ON for address size faults. Some hardware
requires special fixups to recover from address size
faults. Rather than applying the fixups just BUG since
address size faults are due to a fundamental programming
error from which we don't care about recovering anyways.
- qcom,tz-device-id : A string indicating the device ID for this SMMU known
to TZ. See msm_tz_smmu.c for a full list of mappings.
- qcom,skip-init : Disable resetting configuration for all context banks
during device reset. This is useful for targets where
some context banks are dedicated to other execution
environments outside of Linux and those other EEs are
programming their own stream match tables, SCTLR, etc.
Without setting this option we will trample on their
configuration.
- qcom,dynamic : Allow dynamic domains to be attached. This is only
useful if the upstream hardware is capable of switching
between multiple domains within a single context bank.
- qcom,use-3-lvl-tables:
Some hardware configurations may not be optimized for using
a four level page table configuration. Set to use a three
level page table instead.
- qcom,no-asid-retention:
Some hardware may lose internal state for asid after
retention. No cache invalidation operations involving asid
may be used.
- qcom,actlr:
An array of <sid mask actlr-setting>.
Any sid X for which X&~mask==sid will be programmed with the
given actlr-setting.
- qcom,enable-static-cb : Enables option to use pre-defined static context bank
allocation programmed by TZ. Global register including SMR and
S2CR registers are configured by TZ before kernel comes up and
this programming is not altered throughout the life of system.
We would be reading through these registers at run time to
identify CB allocated for a particular sid. SID masking isn't
supported as we are directly comparing client SID with ID bits
of SMR registers.
-qcom,disable-atos:
Some hardware may not have full support for atos debugging
in tandem with other features like power collapse.
- qcom,deferred-regulator-disable-delay : The time delay for deferred regulator
disable in ms. In case of unmap call, regulator is
enabled/disabled. This may introduce additional delay. For
clients who do not detach, it's not possible to keep regulator
vote while smmu is attached. Type is <u32>.
- clocks : List of clocks to be used during SMMU register access. See
Documentation/devicetree/bindings/clock/clock-bindings.txt
for information about the format. For each clock specified
here, there must be a corresponding entry in clock-names
(see below).
- clock-names : List of clock names corresponding to the clocks specified in
the "clocks" property (above). See
Documentation/devicetree/bindings/clock/clock-bindings.txt
for more info.
- (%s)-supply : Phandle of the regulator that should be powered on during
SMMU register access. (%s) is a string from the
qcom,regulator-names property.
- qcom,regulator-names :
List of strings to use with the (%s)-supply property.
- qcom,msm-bus,name
- qcom,msm-bus,num-cases
- qcom,msm-bus,num-paths
- qcom,msm-bus,vectors-KBps
: Refer to devicetree/bindings/arm/msm/msm_bus.txt
** Deprecated properties:
@ -122,18 +208,36 @@ conditions.
};
/* ARM MMU-500 with 10-bit stream ID input configuration */
smmu3: iommu {
compatible = "arm,mmu-500", "arm,smmu-v2";
...
#iommu-cells = <1>;
/* always ignore appended 5-bit TBU number */
stream-match-mask = 0x7c00;
};
* Qualcomm MMU-500 TBU Device
bus {
/* bus whose child devices emit one unique 10-bit stream
ID each, but may master through multiple SMMU TBUs */
iommu-map = <0 &smmu3 0 0x400>;
...
};
The qcom,qsmmu-v500 device implements a number of register regions containing
debug functionality. Each register region maps to a separate tbu from the
arm mmu-500 implementation.
** TBU required properties:
- compatible : Should be one of:
"qcom,qsmmuv500-tbu"
- reg : Base address and size.
- reg-names : "base" and "status-reg" are expected
"base" is the main TBU register region.
"status-reg" indicates whether hw can process a new request.
-qcom,stream-id-range:
Pair of values describing the smallest supported stream-id
and the size of the entire set.
Example:
smmu {
compatible = "qcom,qsmmu-v500";
tbu@0x1000 {
compatible = "qcom,qsmmuv500-tbu";
regs = <0x1000 0x1000>,
<0x2000 0x8>;
reg-names = "base",
"status-reg";
qcom,stream-id-range = <0x800 0x400>;
};
};

View file

@ -0,0 +1,27 @@
This document describes the device tree binding for IOMMU test devices.
The iommu-debug framework can optionally make use of some platform devices
for improved standalone testing and other features.
- compatible: iommu-debug-test
Required properties
===================
- iommus: The IOMMU for the test device (see iommu.txt)
Example
=======
iommu_test_device {
compatible = "iommu-debug-test";
/*
* 42 shouldn't be used by anyone on the cpp_fd_smmu. We just
* need _something_ here to get this node recognized by the
* SMMU driver. Our test uses ATOS, which doesn't use SIDs
* anyways, so using a dummy value is ok.
*/
iommus = <&cpp_fd_smmu 42>;
};

View file

@ -793,6 +793,37 @@ config ARCH_HAS_CACHE_LINE_SIZE
source "mm/Kconfig"
config ARM64_DMA_USE_IOMMU
bool "ARM64 DMA iommu integration"
select ARM_HAS_SG_CHAIN
select NEED_SG_DMA_LENGTH
help
Enable using iommu through the standard dma apis.
dma_alloc_coherent() will allocate scatter-gather memory
which is made virtually contiguous via iommu.
Enable if system contains IOMMU hardware.
if ARM64_DMA_USE_IOMMU
config ARM64_DMA_IOMMU_ALIGNMENT
int "Maximum PAGE_SIZE order of alignment for DMA IOMMU buffers"
range 4 9
default 9
help
DMA mapping framework by default aligns all buffers to the smallest
PAGE_SIZE order which is greater than or equal to the requested buffer
size. This works well for buffers up to a few hundreds kilobytes, but
for larger buffers it just a waste of address space. Drivers which has
relatively small addressing window (like 64Mib) might run out of
virtual space with just a few allocations.
With this parameter you can specify the maximum PAGE_SIZE order for
DMA IOMMU buffers. Larger buffers will be aligned only to this
specified order. The order is expressed as a power of two multiplied
by the PAGE_SIZE.
endif
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---

View file

@ -347,3 +347,5 @@
#clock-cells = <1>;
};
};
#include "msm-arm-smmu-kona.dtsi"

View file

@ -0,0 +1,270 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2018, The Linux Foundation. All rights reserved.
*/
#include <dt-bindings/interrupt-controller/arm-gic.h>
&soc {
kgsl_smmu: kgsl-smmu@0x3DA0000 {
compatible = "qcom,qsmmu-v500";
reg = <0x3DA0000 0x10000>,
<0x3DC2000 0x20>;
reg-names = "base", "tcu-base";
#iommu-cells = <2>;
qcom,dynamic;
qcom,skip-init;
qcom,use-3-lvl-tables;
#global-interrupts = <2>;
#size-cells = <1>;
#address-cells = <1>;
status = "disabled";
ranges;
interrupts = <GIC_SPI 673 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 674 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 682 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 683 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 684 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 685 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 686 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 687 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 688 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 689 IRQ_TYPE_LEVEL_HIGH>;
gfx_0_tbu: gfx_0_tbu@0x3DC5000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x3DC5000 0x1000>,
<0x3DC2200 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x0 0x400>;
};
gfx_1_tbu: gfx_1_tbu@0x3DC9000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x3DC9000 0x1000>,
<0x3DC2208 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x400 0x400>;
};
};
apps_smmu: apps-smmu@0x15000000 {
compatible = "qcom,qsmmu-v500";
reg = <0x15000000 0x100000>,
<0x15182000 0x20>;
reg-names = "base", "tcu-base";
#iommu-cells = <2>;
qcom,skip-init;
qcom,use-3-lvl-tables;
#global-interrupts = <2>;
#size-cells = <1>;
#address-cells = <1>;
status = "disabled";
ranges;
interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 104 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 109 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 110 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 181 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 182 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 183 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 185 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 189 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 315 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 316 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 317 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 318 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 319 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 320 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 321 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 322 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 323 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 324 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 325 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 326 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 327 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 328 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 329 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 330 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 331 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 332 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 333 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 340 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 341 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 342 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 343 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 344 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 345 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 395 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 396 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 397 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 398 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 399 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 400 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 401 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 402 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 403 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 404 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 405 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 406 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 407 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 408 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 409 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 418 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 419 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 420 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 690 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 691 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 692 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 693 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 694 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 695 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 696 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 697 IRQ_TYPE_LEVEL_HIGH>;
anoc_1_tbu: anoc_1_tbu@0x15185000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x15185000 0x1000>,
<0x15182200 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x0 0x400>;
};
anoc_2_tbu: anoc_2_tbu@0x15189000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x15189000 0x1000>,
<0x15182208 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x400 0x400>;
};
mnoc_hf_0_tbu: mnoc_hf_0_tbu@0x1518D000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x1518D000 0x1000>,
<0x15182210 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x800 0x400>;
};
mnoc_hf_1_tbu: mnoc_hf_1_tbu@0x15191000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x15191000 0x1000>,
<0x15182218 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0xc00 0x400>;
};
compute_dsp_1_tbu: compute_dsp_1_tbu@0x15195000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x15195000 0x1000>,
<0x15182220 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x1000 0x400>;
};
compute_dsp_0_tbu: compute_dsp_0_tbu@0x15199000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x15199000 0x1000>,
<0x15182228 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x1400 0x400>;
};
adsp_tbu: adsp_tbu@0x1519D000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x1519D000 0x1000>,
<0x15182230 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x1800 0x400>;
};
anoc_1_pcie_tbu: anoc_1_pcie_tbu@0x151A1000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x151A1000 0x1000>,
<0x15182238 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x1c00 0x400>;
};
mnoc_sf_0_tbu: mnoc_sf_0_tbu@0x151A5000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x151A5000 0x1000>,
<0x15182240 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x2000 0x400>;
};
mnoc_sf_1_tbu: mnoc_sf_1_tbu@0x151A9000 {
compatible = "qcom,qsmmuv500-tbu";
reg = <0x151A9000 0x1000>,
<0x15182248 0x8>;
reg-names = "base", "status-reg";
qcom,stream-id-range = <0x2400 0x400>;
};
};
kgsl_iommu_test_device {
compatible = "iommu-debug-test";
iommus = <&kgsl_smmu 0x7 0>;
status = "disabled";
};
kgsl_iommu_coherent_test_device {
compatible = "iommu-debug-test";
iommus = <&kgsl_smmu 0x9 0>;
dma-coherent;
status = "disabled";
};
apps_iommu_test_device {
compatible = "iommu-debug-test";
iommus = <&apps_smmu 0x21 0>;
status = "disabled";
};
apps_iommu_coherent_test_device {
compatible = "iommu-debug-test";
iommus = <&apps_smmu 0x23 0>;
dma-coherent;
status = "disabled";
};
};

View file

@ -317,10 +317,9 @@ CONFIG_DMADEVICES=y
CONFIG_UIO=y
CONFIG_STAGING=y
CONFIG_ASHMEM=y
CONFIG_ION=y
CONFIG_HWSPINLOCK=y
CONFIG_HWSPINLOCK_QCOM=y
CONFIG_ARM_SMMU=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_QCOM_SMEM=y
CONFIG_DEVFREQ_GOV_PASSIVE=y
CONFIG_IIO=y

View file

@ -324,10 +324,9 @@ CONFIG_DMADEVICES=y
CONFIG_UIO=y
CONFIG_STAGING=y
CONFIG_ASHMEM=y
CONFIG_ION=y
CONFIG_HWSPINLOCK=y
CONFIG_HWSPINLOCK_QCOM=y
CONFIG_ARM_SMMU=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_QCOM_SMEM=y
CONFIG_DEVFREQ_GOV_PASSIVE=y
CONFIG_IIO=y

View file

@ -101,6 +101,15 @@ static inline void flush_cache_range(struct vm_area_struct *vma,
extern void __dma_map_area(const void *, size_t, int);
extern void __dma_unmap_area(const void *, size_t, int);
extern void __dma_flush_area(const void *, size_t);
extern void __dma_inv_area(const void *start, size_t size);
extern void __dma_clean_area(const void *start, size_t size);
#define dmac_flush_range(start, end) \
__dma_flush_area(start, (void *)(end) - (void *)(start))
#define dmac_inv_range(start, end) \
__dma_inv_area(start, (void *)(end) - (void *)(start))
#define dmac_clean_range(start, end) \
__dma_clean_area(start, (void *)(end) - (void *)(start))
/*
* Copy user data from/to a page which is mapped into a different

View file

@ -24,9 +24,18 @@ struct dev_archdata {
const struct dma_map_ops *dev_dma_ops;
#endif
bool dma_coherent;
#ifdef CONFIG_ARM64_DMA_USE_IOMMU
struct dma_iommu_mapping *mapping;
#endif
};
struct pdev_archdata {
};
#ifdef CONFIG_ARM64_DMA_USE_IOMMU
#define to_dma_iommu_mapping(dev) ((dev)->archdata.mapping)
#else
#define to_dma_iommu_mapping(dev) NULL
#endif
#endif

View file

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013,2017-2018 The Linux Foundation. All rights reserved.
*/
#ifndef _ASM_DMA_CONTIGUOUS_H
#define _ASM_DMA_CONTIGUOUS_H
#ifdef __KERNEL__
#ifdef CONFIG_DMA_CMA
#include <linux/types.h>
void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size);
#endif
#endif
#endif

View file

@ -0,0 +1,68 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ASMARM_DMA_IOMMU_H
#define ASMARM_DMA_IOMMU_H
#ifdef __KERNEL__
#include <linux/err.h>
#include <linux/mm_types.h>
#include <linux/scatterlist.h>
#include <linux/dma-debug.h>
#include <linux/kref.h>
#include <linux/dma-mapping-fast.h>
struct dma_iommu_mapping {
/* iommu specific data */
struct iommu_domain *domain;
bool init;
struct kref kref;
const struct dma_map_ops *ops;
/* Protects bitmap */
spinlock_t lock;
void *bitmap;
size_t bits;
dma_addr_t base;
u32 min_iova_align;
struct page *guard_page;
struct dma_fast_smmu_mapping *fast;
};
#ifdef CONFIG_ARM64_DMA_USE_IOMMU
struct dma_iommu_mapping *
arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size);
void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping);
int arm_iommu_attach_device(struct device *dev,
struct dma_iommu_mapping *mapping);
void arm_iommu_detach_device(struct device *dev);
#else /* !CONFIG_ARM64_DMA_USE_IOMMU */
static inline struct dma_iommu_mapping *
arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size)
{
return NULL;
}
static inline void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping)
{
}
static inline int arm_iommu_attach_device(struct device *dev,
struct dma_iommu_mapping *mapping)
{
return -ENODEV;
}
static inline void arm_iommu_detach_device(struct device *dev)
{
}
#endif /* CONFIG_ARM64_DMA_USE_IOMMU */
#endif /* __KERNEL__ */
#endif

View file

@ -157,7 +157,7 @@ ENTRY(__inval_dcache_area)
* - start - virtual start address of region
* - size - size in question
*/
__dma_inv_area:
ENTRY(__dma_inv_area)
add x1, x1, x0
dcache_line_size x2, x3
sub x3, x2, #1
@ -196,7 +196,7 @@ ENTRY(__clean_dcache_area_poc)
* - start - virtual start address of region
* - size - size in question
*/
__dma_clean_area:
ENTRY(__dma_clean_area)
dcache_by_line_op cvac, sy, x0, x1, x2, x3
ret
ENDPIPROC(__clean_dcache_area_poc)

File diff suppressed because it is too large Load diff

View file

@ -32,6 +32,8 @@
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/dma-contiguous.h>
#include <linux/cma.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@ -66,6 +68,40 @@ static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
struct dma_contig_early_reserve {
phys_addr_t base;
unsigned long size;
};
static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS];
static int dma_mmu_remap_num;
void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
{
if (dma_mmu_remap_num >= ARRAY_SIZE(dma_mmu_remap)) {
pr_err("ARM64: Not enough slots for DMA fixup reserved regions!\n");
return;
}
dma_mmu_remap[dma_mmu_remap_num].base = base;
dma_mmu_remap[dma_mmu_remap_num].size = size;
dma_mmu_remap_num++;
}
static bool dma_overlap(phys_addr_t start, phys_addr_t end)
{
int i;
for (i = 0; i < dma_mmu_remap_num; i++) {
phys_addr_t dma_base = dma_mmu_remap[i].base;
phys_addr_t dma_end = dma_mmu_remap[i].base +
dma_mmu_remap[i].size;
if ((dma_base < end) && (dma_end > start))
return true;
}
return false;
}
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
unsigned long size, pgprot_t vma_prot)
{
@ -199,7 +235,8 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
/* try section mapping first */
if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
(flags & NO_BLOCK_MAPPINGS) == 0 &&
!dma_overlap(phys, phys + next - addr)) {
pmd_set_huge(pmdp, phys, prot);
/*
@ -298,7 +335,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
* For 4K granule only, attempt to put down a 1GB block
*/
if (use_1G_block(addr, next, phys) &&
(flags & NO_BLOCK_MAPPINGS) == 0) {
(flags & NO_BLOCK_MAPPINGS) == 0 &&
!dma_overlap(phys, phys + next - addr)) {
pud_set_huge(pudp, phys, prot);
/*

View file

@ -58,6 +58,43 @@ config IOMMU_IO_PGTABLE_ARMV7S_SELFTEST
If unsure, say N here.
config IOMMU_IO_PGTABLE_FAST
bool "Fast ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
help
Enable support for a subset of the ARM long descriptor pagetable
format. This allocator achieves fast performance by
pre-allocating and pre-populating page table memory up front.
only supports a 32 bit virtual address space.
This implementation is mainly optimized for use cases where the
buffers are small (<= 64K) since it only supports 4K page sizes.
config IOMMU_IO_PGTABLE_FAST_SELFTEST
bool "Fast IO pgtable selftests"
depends on IOMMU_IO_PGTABLE_FAST
help
Enable self-tests for "fast" page table allocator.
This performs a series of page-table consistency checks
during boot.
If unsure, say N here.
config IOMMU_IO_PGTABLE_FAST_PROVE_TLB
bool "Prove correctness of TLB maintenance in the Fast DMA mapper"
depends on IOMMU_IO_PGTABLE_FAST
help
Enables some debug features that help prove correctness of TLB
maintenance routines in the Fast DMA mapper. This option will
slow things down considerably, so should only be used in a debug
configuration. This relies on the ability to set bits in an
invalid page table entry, which is disallowed on some hardware
due to errata. If you're running on such a platform then this
option can only be used with unit tests. It will break real use
cases.
If unsure, say N here.
endmenu
config IOMMU_IOVA
@ -305,6 +342,7 @@ config ARM_SMMU
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
select ARM_DMA_USE_IOMMU if ARM
select ARM64_DMA_USE_IOMMU if ARM64
help
Support for implementations of the ARM System MMU architecture
versions 1 and 2.
@ -325,6 +363,18 @@ config ARM_SMMU_V3
Say Y here if your system includes an IOMMU device implementing
the ARM SMMUv3 architecture.
config QCOM_LAZY_MAPPING
bool "Reference counted iommu-mapping support"
depends on ION
depends on IOMMU_API
help
ION buffers may be shared between several software clients.
Reference counting the mapping may simplify coordination between
these clients, and decrease latency by preventing multiple
map/unmaps of the same region.
If unsure, say N here.
config S390_IOMMU
def_bool y if S390 && PCI
depends on S390 && PCI
@ -372,6 +422,38 @@ config MTK_IOMMU_V1
if unsure, say N here.
menuconfig IOMMU_DEBUG
bool "IOMMU Profiling and Debugging"
help
This option is used to enable profiling and debugging in
the IOMMU framework code. IOMMU profiling and debugging
can be done through the debugfs nodes which this option
makes available.
if IOMMU_DEBUG
config IOMMU_DEBUG_TRACKING
bool "Track key IOMMU events"
select IOMMU_API
help
Enables additional debug tracking in the IOMMU framework code.
Tracking information and tests can be accessed through various
debugfs files.
Say Y here if you need to debug IOMMU issues and are okay with
the performance penalty of the tracking.
config IOMMU_TESTS
bool "Interactive IOMMU performance/functional tests"
select IOMMU_API
help
Enables a suite of IOMMU unit tests. The tests are runnable
through debugfs. Unlike the IOMMU_DEBUG_TRACKING option, the
impact of enabling this option to overal system performance
should be minimal.
endif # IOMMU_DEBUG
config QCOM_IOMMU
# Note: iommu drivers cannot (yet?) be built as modules
bool "Qualcomm IOMMU Support"

View file

@ -3,15 +3,18 @@ obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
obj-$(CONFIG_QCOM_LAZY_MAPPING) += msm_dma_iommu_mapping.o
obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
obj-$(CONFIG_IOMMU_IOVA) += iova.o
obj-$(CONFIG_IOMMU_IO_PGTABLE_FAST) += io-pgtable-fast.o dma-mapping-fast.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
obj-$(CONFIG_IOMMU_DEBUG) += iommu-debug.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU) += arm-smmu.o arm-smmu-errata.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o

View file

@ -0,0 +1,44 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2018, The Linux Foundation. All rights reserved.
*/
#include <linux/kernel.h>
#include <soc/qcom/secure_buffer.h>
#include <linux/arm-smmu-errata.h>
static struct page *guard_pages[VMID_LAST];
static DEFINE_MUTEX(guard_page_lock);
struct page *arm_smmu_errata_get_guard_page(int vmid)
{
struct page *page;
int ret;
int source_vm = VMID_HLOS;
int dest_vm = vmid;
int dest_perm = PERM_READ | PERM_WRITE | PERM_EXEC;
size_t size = ARM_SMMU_MIN_IOVA_ALIGN;
mutex_lock(&guard_page_lock);
page = guard_pages[vmid];
if (page)
goto out;
page = alloc_pages(GFP_KERNEL, get_order(size));
if (!page)
goto out;
if (vmid != VMID_HLOS) {
ret = hyp_assign_phys(page_to_phys(page), PAGE_ALIGN(size),
&source_vm, 1,
&dest_vm, &dest_perm, 1);
if (ret) {
__free_pages(page, get_order(size));
page = NULL;
}
}
guard_pages[vmid] = page;
out:
mutex_unlock(&guard_page_lock);
return page;
}

View file

@ -37,6 +37,9 @@
#define sCR0_VMID16EN (1 << 31)
#define sCR0_BSU_SHIFT 14
#define sCR0_BSU_MASK 0x3
#define sCR0_SHCFG_SHIFT 22
#define sCR0_SHCFG_MASK 0x3
#define sCR0_SHCFG_NSH 3
/* Auxiliary Configuration register */
#define ARM_SMMU_GR0_sACR 0x10
@ -105,6 +108,8 @@
#define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2))
#define SMR_VALID (1 << 31)
#define SMR_MASK_SHIFT 16
#define SMR_MASK_MASK 0x7FFF
#define SID_MASK 0x7FFF
#define SMR_ID_SHIFT 0
#define ARM_SMMU_GR0_S2CR(n) (0xc00 + ((n) << 2))
@ -113,6 +118,9 @@
#define S2CR_EXIDVALID (1 << 10)
#define S2CR_TYPE_SHIFT 16
#define S2CR_TYPE_MASK 0x3
#define S2CR_SHCFG_SHIFT 8
#define S2CR_SHCFG_MASK 0x3
#define S2CR_SHCFG_NSH 0x3
enum arm_smmu_s2cr_type {
S2CR_TYPE_TRANS,
S2CR_TYPE_BYPASS,
@ -147,6 +155,9 @@ enum arm_smmu_s2cr_privcfg {
#define CBAR_IRPTNDX_SHIFT 24
#define CBAR_IRPTNDX_MASK 0xff
#define ARM_SMMU_GR1_CBFRSYNRA(n) (0x400 + ((n) << 2))
#define CBFRSYNRA_SID_MASK (0xffff)
#define ARM_SMMU_GR1_CBA2R(n) (0x800 + ((n) << 2))
#define CBA2R_RW64_32BIT (0 << 0)
#define CBA2R_RW64_64BIT (1 << 0)
@ -165,20 +176,28 @@ enum arm_smmu_s2cr_privcfg {
#define ARM_SMMU_CB_S1_MAIR1 0x3c
#define ARM_SMMU_CB_PAR 0x50
#define ARM_SMMU_CB_FSR 0x58
#define ARM_SMMU_CB_FSRRESTORE 0x5c
#define ARM_SMMU_CB_FAR 0x60
#define ARM_SMMU_CB_FSYNR0 0x68
#define ARM_SMMU_CB_FSYNR1 0x6c
#define ARM_SMMU_CB_S1_TLBIVA 0x600
#define ARM_SMMU_CB_S1_TLBIASID 0x610
#define ARM_SMMU_CB_S1_TLBIALL 0x618
#define ARM_SMMU_CB_S1_TLBIVAL 0x620
#define ARM_SMMU_CB_S2_TLBIIPAS2 0x630
#define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638
#define ARM_SMMU_CB_TLBSYNC 0x7f0
#define ARM_SMMU_CB_TLBSTATUS 0x7f4
#define TLBSTATUS_SACTIVE (1 << 0)
#define ARM_SMMU_CB_ATS1PR 0x800
#define ARM_SMMU_CB_ATSR 0x8f0
#define SCTLR_SHCFG_SHIFT 22
#define SCTLR_SHCFG_MASK 0x3
#define SCTLR_SHCFG_NSH 0x3
#define SCTLR_S1_ASIDPNE (1 << 12)
#define SCTLR_CFCFG (1 << 7)
#define SCTLR_HUPCF (1 << 8)
#define SCTLR_CFIE (1 << 6)
#define SCTLR_CFRE (1 << 5)
#define SCTLR_E (1 << 4)

View file

@ -2889,15 +2889,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
return 0;
}
static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
arm_smmu_device_remove(pdev);
}
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
@ -2911,7 +2905,6 @@ static struct platform_driver arm_smmu_driver = {
},
.probe = arm_smmu_device_probe,
.remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
module_platform_driver(arm_smmu_driver);

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,8 @@
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
#include <linux/arm-smmu-errata.h>
#include <soc/qcom/secure_buffer.h>
#define IOMMU_MAPPING_ERROR 0
@ -55,6 +57,8 @@ struct iommu_dma_cookie {
};
struct list_head msi_page_list;
spinlock_t msi_lock;
u32 min_iova_align;
struct page *guard_page;
};
static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@ -257,6 +261,28 @@ static int iova_reserve_iommu_regions(struct device *dev,
return ret;
}
static int iommu_dma_arm_smmu_errata_init(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
int vmid = VMID_HLOS;
int min_iova_align = 0;
iommu_domain_get_attr(domain,
DOMAIN_ATTR_QCOM_MMU500_ERRATA_MIN_IOVA_ALIGN,
&min_iova_align);
iommu_domain_get_attr(domain, DOMAIN_ATTR_SECURE_VMID, &vmid);
if (vmid >= VMID_LAST || vmid < 0)
vmid = VMID_HLOS;
if (min_iova_align) {
cookie->min_iova_align = ARM_SMMU_MIN_IOVA_ALIGN;
cookie->guard_page = arm_smmu_errata_get_guard_page(vmid);
if (!cookie->guard_page)
return -ENOMEM;
}
return 0;
}
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@ -279,6 +305,9 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
return -EINVAL;
if (iommu_dma_arm_smmu_errata_init(domain))
return -ENODEV;
/* Use the smallest supported page size for IOVA granularity */
order = __ffs(domain->pgsize_bitmap);
base_pfn = max_t(unsigned long, 1, base >> order);
@ -332,6 +361,15 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
if (attrs & DMA_ATTR_PRIVILEGED)
prot |= IOMMU_PRIV;
if (!(attrs & DMA_ATTR_EXEC_MAPPING))
prot |= IOMMU_NOEXEC;
if (attrs & DMA_ATTR_IOMMU_USE_UPSTREAM_HINT)
prot |= IOMMU_USE_UPSTREAM_HINT;
if (attrs & DMA_ATTR_IOMMU_USE_LLC_NWA)
prot |= IOMMU_USE_LLC_NWA;
switch (dir) {
case DMA_BIDIRECTIONAL:
return prot | IOMMU_READ | IOMMU_WRITE;
@ -350,14 +388,22 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
unsigned long shift, iova_len, iova = 0;
dma_addr_t limit;
unsigned long guard_len;
dma_addr_t ret_iova;
if (cookie->type == IOMMU_DMA_MSI_COOKIE) {
cookie->msi_iova += size;
return cookie->msi_iova - size;
}
if (cookie->min_iova_align)
guard_len = ALIGN(size, cookie->min_iova_align) - size;
else
guard_len = 0;
shift = iova_shift(iovad);
iova_len = size >> shift;
iova_len = (size + guard_len) >> shift;
/*
* Freeing non-power-of-two-sized allocations back into the IOVA caches
* will come back to bite us badly, so we have to waste a bit of space
@ -370,22 +416,52 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
if (domain->geometry.force_aperture)
dma_limit = min(dma_limit, domain->geometry.aperture_end);
/*
* Ensure iova is within range specified in iommu_dma_init_domain().
* This also prevents unnecessary work iterating through the entire
* rb_tree.
*/
limit = min_t(dma_addr_t, DMA_BIT_MASK(32) >> shift,
iovad->dma_32bit_pfn);
/* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
iova = alloc_iova_fast(iovad, iova_len,
DMA_BIT_MASK(32) >> shift, false);
iova = alloc_iova_fast(iovad, iova_len, limit, false);
if (!iova)
iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
true);
if (!iova) {
limit = min_t(dma_addr_t, dma_limit >> shift,
iovad->dma_32bit_pfn);
return (dma_addr_t)iova << shift;
iova = alloc_iova_fast(iovad, iova_len, limit, true);
}
ret_iova = (dma_addr_t)iova << shift;
if (guard_len &&
iommu_map(domain, ret_iova + size,
page_to_phys(cookie->guard_page),
guard_len, ARM_SMMU_GUARD_PROT)) {
free_iova_fast(iovad, iova, iova_len);
return 0;
}
return ret_iova;
}
static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
static void iommu_dma_free_iova(struct iommu_domain *domain,
struct iommu_dma_cookie *cookie,
dma_addr_t iova, size_t size)
{
struct iova_domain *iovad = &cookie->iovad;
unsigned long guard_len;
if (cookie->min_iova_align) {
guard_len = ALIGN(size, cookie->min_iova_align) - size;
iommu_unmap(domain, iova + size, guard_len);
} else {
guard_len = 0;
}
/* The MSI case is only ever cleaning up its most recent allocation */
if (cookie->type == IOMMU_DMA_MSI_COOKIE)
@ -406,7 +482,7 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
size = iova_align(iovad, size + iova_off);
WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
iommu_dma_free_iova(cookie, dma_addr, size);
iommu_dma_free_iova(domain, cookie, dma_addr, size);
}
static void __iommu_dma_free_pages(struct page **pages, int count)
@ -571,7 +647,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp,
out_free_sg:
sg_free_table(&sgt);
out_free_iova:
iommu_dma_free_iova(cookie, iova, size);
iommu_dma_free_iova(domain, cookie, iova, size);
out_free_pages:
__iommu_dma_free_pages(pages, count);
return NULL;
@ -620,7 +696,7 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
return IOMMU_MAPPING_ERROR;
if (iommu_map(domain, iova, phys - iova_off, size, prot)) {
iommu_dma_free_iova(cookie, iova, size);
iommu_dma_free_iova(domain, cookie, iova, size);
return IOMMU_MAPPING_ERROR;
}
return iova + iova_off;
@ -785,7 +861,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
return __finalise_sg(dev, sg, nents, iova);
out_free_iova:
iommu_dma_free_iova(cookie, iova, iova_len);
iommu_dma_free_iova(domain, cookie, iova, iova_len);
out_restore_sg:
__invalidate_sg(sg, nents);
return 0;

View file

@ -0,0 +1,979 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
*/
#include <linux/dma-contiguous.h>
#include <linux/dma-mapping.h>
#include <linux/dma-mapping-fast.h>
#include <linux/io-pgtable-fast.h>
#include <linux/vmalloc.h>
#include <asm/cacheflush.h>
#include <asm/dma-iommu.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/pci.h>
#include <soc/qcom/secure_buffer.h>
#include <linux/arm-smmu-errata.h>
/* some redundant definitions... :( TODO: move to io-pgtable-fast.h */
#define FAST_PAGE_SHIFT 12
#define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT)
#define FAST_PAGE_MASK (~(PAGE_SIZE - 1))
#define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
#define FAST_MAIR_ATTR_IDX_CACHE 1
#define FAST_PTE_ATTRINDX_SHIFT 2
#define FAST_PTE_ATTRINDX_MASK 0x7
#define FAST_PTE_SH_SHIFT 8
#define FAST_PTE_SH_MASK (((av8l_fast_iopte)0x3) << FAST_PTE_SH_SHIFT)
#define FAST_PTE_SH_OS (((av8l_fast_iopte)2) << FAST_PTE_SH_SHIFT)
#define FAST_PTE_SH_IS (((av8l_fast_iopte)3) << FAST_PTE_SH_SHIFT)
static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
bool coherent)
{
if (attrs & DMA_ATTR_STRONGLY_ORDERED)
return pgprot_noncached(prot);
else if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
return pgprot_writecombine(prot);
return prot;
}
static int __get_iommu_pgprot(unsigned long attrs, int prot,
bool coherent)
{
if (!(attrs & DMA_ATTR_EXEC_MAPPING))
prot |= IOMMU_NOEXEC;
if ((attrs & DMA_ATTR_STRONGLY_ORDERED))
prot |= IOMMU_MMIO;
if (coherent)
prot |= IOMMU_CACHE;
return prot;
}
static void fast_dmac_clean_range(struct dma_fast_smmu_mapping *mapping,
void *start, void *end)
{
if (!mapping->is_smmu_pt_coherent)
dmac_clean_range(start, end);
}
static bool __fast_is_pte_coherent(av8l_fast_iopte *ptep)
{
int attr_idx = (*ptep & (FAST_PTE_ATTRINDX_MASK <<
FAST_PTE_ATTRINDX_SHIFT)) >>
FAST_PTE_ATTRINDX_SHIFT;
if ((attr_idx == FAST_MAIR_ATTR_IDX_CACHE) &&
(((*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_IS) ||
(*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_OS))
return true;
return false;
}
static bool is_dma_coherent(struct device *dev, unsigned long attrs)
{
bool is_coherent;
if (attrs & DMA_ATTR_FORCE_COHERENT)
is_coherent = true;
else if (attrs & DMA_ATTR_FORCE_NON_COHERENT)
is_coherent = false;
else if (is_device_dma_coherent(dev))
is_coherent = true;
else
is_coherent = false;
return is_coherent;
}
/*
* Checks if the allocated range (ending at @end) covered the upcoming
* stale bit. We don't need to know exactly where the range starts since
* we already know where the candidate search range started. If, starting
* from the beginning of the candidate search range, we had to step over
* (or landed directly on top of) the upcoming stale bit, then we return
* true.
*
* Due to wrapping, there are two scenarios we'll need to check: (1) if the
* range [search_start, upcoming_stale] spans 0 (i.e. search_start >
* upcoming_stale), and, (2) if the range: [search_start, upcoming_stale]
* does *not* span 0 (i.e. search_start <= upcoming_stale). And for each
* of those two scenarios we need to handle three cases: (1) the bit was
* found before wrapping or
*/
static bool __bit_covered_stale(unsigned long upcoming_stale,
unsigned long search_start,
unsigned long end)
{
if (search_start > upcoming_stale) {
if (end >= search_start) {
/*
* We started searching above upcoming_stale and we
* didn't wrap, so we couldn't have crossed
* upcoming_stale.
*/
return false;
}
/*
* We wrapped. Did we cross (or land on top of)
* upcoming_stale?
*/
return end >= upcoming_stale;
}
if (search_start <= upcoming_stale) {
if (end >= search_start) {
/*
* We didn't wrap. Did we cross (or land on top
* of) upcoming_stale?
*/
return end >= upcoming_stale;
}
/*
* We wrapped. So we must have crossed upcoming_stale
* (since we started searching below it).
*/
return true;
}
/* we should have covered all logical combinations... */
WARN_ON(1);
return true;
}
static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
unsigned long attrs,
size_t size)
{
unsigned long bit, prev_search_start, nbits;
unsigned long align;
unsigned long guard_len;
dma_addr_t iova;
if (mapping->min_iova_align)
guard_len = ALIGN(size, mapping->min_iova_align) - size;
else
guard_len = 0;
nbits = (size + guard_len) >> FAST_PAGE_SHIFT;
align = (1 << get_order(size + guard_len)) - 1;
bit = bitmap_find_next_zero_area(
mapping->bitmap, mapping->num_4k_pages, mapping->next_start,
nbits, align);
if (unlikely(bit > mapping->num_4k_pages)) {
/* try wrapping */
mapping->next_start = 0; /* TODO: SHOULD I REALLY DO THIS?!? */
bit = bitmap_find_next_zero_area(
mapping->bitmap, mapping->num_4k_pages, 0, nbits,
align);
if (unlikely(bit > mapping->num_4k_pages))
return DMA_ERROR_CODE;
}
bitmap_set(mapping->bitmap, bit, nbits);
prev_search_start = mapping->next_start;
mapping->next_start = bit + nbits;
if (unlikely(mapping->next_start >= mapping->num_4k_pages))
mapping->next_start = 0;
/*
* If we just re-allocated a VA whose TLB hasn't been invalidated
* since it was last used and unmapped, we need to invalidate it
* here. We actually invalidate the entire TLB so that we don't
* have to invalidate the TLB again until we wrap back around.
*/
if (mapping->have_stale_tlbs &&
__bit_covered_stale(mapping->upcoming_stale_bit,
prev_search_start,
bit + nbits - 1)) {
bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
iommu_tlbiall(mapping->domain);
mapping->have_stale_tlbs = false;
av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync);
}
iova = (bit << FAST_PAGE_SHIFT) + mapping->base;
if (guard_len &&
iommu_map(mapping->domain, iova + size,
page_to_phys(mapping->guard_page),
guard_len, ARM_SMMU_GUARD_PROT)) {
bitmap_clear(mapping->bitmap, bit, nbits);
return DMA_ERROR_CODE;
}
return iova;
}
/*
* Checks whether the candidate bit will be allocated sooner than the
* current upcoming stale bit. We can say candidate will be upcoming
* sooner than the current upcoming stale bit if it lies between the
* starting bit of the next search range and the upcoming stale bit
* (allowing for wrap-around).
*
* Stated differently, we're checking the relative ordering of three
* unsigned numbers. So we need to check all 6 (i.e. 3!) permutations,
* namely:
*
* 0 |---A---B---C---| TOP (Case 1)
* 0 |---A---C---B---| TOP (Case 2)
* 0 |---B---A---C---| TOP (Case 3)
* 0 |---B---C---A---| TOP (Case 4)
* 0 |---C---A---B---| TOP (Case 5)
* 0 |---C---B---A---| TOP (Case 6)
*
* Note that since we're allowing numbers to wrap, the following three
* scenarios are all equivalent for Case 1:
*
* 0 |---A---B---C---| TOP
* 0 |---C---A---B---| TOP (C has wrapped. This is Case 5.)
* 0 |---B---C---A---| TOP (C and B have wrapped. This is Case 4.)
*
* In any of these cases, if we start searching from A, we will find B
* before we find C.
*
* We can also find two equivalent cases for Case 2:
*
* 0 |---A---C---B---| TOP
* 0 |---B---A---C---| TOP (B has wrapped. This is Case 3.)
* 0 |---C---B---A---| TOP (B and C have wrapped. This is Case 6.)
*
* In any of these cases, if we start searching from A, we will find C
* before we find B.
*/
static bool __bit_is_sooner(unsigned long candidate,
struct dma_fast_smmu_mapping *mapping)
{
unsigned long A = mapping->next_start;
unsigned long B = candidate;
unsigned long C = mapping->upcoming_stale_bit;
if ((A < B && B < C) || /* Case 1 */
(C < A && A < B) || /* Case 5 */
(B < C && C < A)) /* Case 4 */
return true;
if ((A < C && C < B) || /* Case 2 */
(B < A && A < C) || /* Case 3 */
(C < B && B < A)) /* Case 6 */
return false;
/*
* For simplicity, we've been ignoring the possibility of any of
* our three numbers being equal. Handle those cases here (they
* shouldn't happen very often, (I think?)).
*/
/*
* If candidate is the next bit to be searched then it's definitely
* sooner.
*/
if (A == B)
return true;
/*
* If candidate is the next upcoming stale bit we'll return false
* to avoid doing `upcoming = candidate' in the caller (which would
* be useless since they're already equal)
*/
if (B == C)
return false;
/*
* If next start is the upcoming stale bit then candidate can't
* possibly be sooner. The "soonest" bit is already selected.
*/
if (A == C)
return false;
/* We should have covered all logical combinations. */
WARN(1, "Well, that's awkward. A=%ld, B=%ld, C=%ld\n", A, B, C);
return true;
}
static void __fast_smmu_free_iova(struct dma_fast_smmu_mapping *mapping,
dma_addr_t iova, size_t size)
{
unsigned long start_bit = (iova - mapping->base) >> FAST_PAGE_SHIFT;
unsigned long nbits;
unsigned long guard_len;
if (mapping->min_iova_align) {
guard_len = ALIGN(size, mapping->min_iova_align) - size;
iommu_unmap(mapping->domain, iova + size, guard_len);
} else {
guard_len = 0;
}
nbits = (size + guard_len) >> FAST_PAGE_SHIFT;
/*
* We don't invalidate TLBs on unmap. We invalidate TLBs on map
* when we're about to re-allocate a VA that was previously
* unmapped but hasn't yet been invalidated. So we need to keep
* track of which bit is the closest to being re-allocated here.
*/
if (__bit_is_sooner(start_bit, mapping))
mapping->upcoming_stale_bit = start_bit;
bitmap_clear(mapping->bitmap, start_bit, nbits);
mapping->have_stale_tlbs = true;
}
static void __fast_dma_page_cpu_to_dev(struct page *page, unsigned long off,
size_t size, enum dma_data_direction dir)
{
__dma_map_area(page_address(page) + off, size, dir);
}
static void __fast_dma_page_dev_to_cpu(struct page *page, unsigned long off,
size_t size, enum dma_data_direction dir)
{
__dma_unmap_area(page_address(page) + off, size, dir);
/* TODO: WHAT IS THIS? */
/*
* Mark the D-cache clean for this page to avoid extra flushing.
*/
if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
set_bit(PG_dcache_clean, &page->flags);
}
static int __fast_dma_direction_to_prot(enum dma_data_direction dir)
{
switch (dir) {
case DMA_BIDIRECTIONAL:
return IOMMU_READ | IOMMU_WRITE;
case DMA_TO_DEVICE:
return IOMMU_READ;
case DMA_FROM_DEVICE:
return IOMMU_WRITE;
default:
return 0;
}
}
static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
dma_addr_t iova;
unsigned long flags;
av8l_fast_iopte *pmd;
phys_addr_t phys_plus_off = page_to_phys(page) + offset;
phys_addr_t phys_to_map = round_down(phys_plus_off, FAST_PAGE_SIZE);
unsigned long offset_from_phys_to_map = phys_plus_off & ~FAST_PAGE_MASK;
size_t len = ALIGN(size + offset_from_phys_to_map, FAST_PAGE_SIZE);
int nptes = len >> FAST_PAGE_SHIFT;
bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
int prot = __fast_dma_direction_to_prot(dir);
bool is_coherent = is_dma_coherent(dev, attrs);
prot = __get_iommu_pgprot(attrs, prot, is_coherent);
if (!skip_sync && !is_coherent)
__fast_dma_page_cpu_to_dev(phys_to_page(phys_to_map),
offset_from_phys_to_map, size, dir);
spin_lock_irqsave(&mapping->lock, flags);
iova = __fast_smmu_alloc_iova(mapping, attrs, len);
if (unlikely(iova == DMA_ERROR_CODE))
goto fail;
pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
if (unlikely(av8l_fast_map_public(pmd, phys_to_map, len, prot)))
goto fail_free_iova;
fast_dmac_clean_range(mapping, pmd, pmd + nptes);
spin_unlock_irqrestore(&mapping->lock, flags);
return iova + offset_from_phys_to_map;
fail_free_iova:
__fast_smmu_free_iova(mapping, iova, size);
fail:
spin_unlock_irqrestore(&mapping->lock, flags);
return DMA_ERROR_CODE;
}
static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
unsigned long flags;
av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
unsigned long offset = iova & ~FAST_PAGE_MASK;
size_t len = ALIGN(size + offset, FAST_PAGE_SIZE);
int nptes = len >> FAST_PAGE_SHIFT;
struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
bool is_coherent = is_dma_coherent(dev, attrs);
if (!skip_sync && !is_coherent)
__fast_dma_page_dev_to_cpu(page, offset, size, dir);
spin_lock_irqsave(&mapping->lock, flags);
av8l_fast_unmap_public(pmd, len);
fast_dmac_clean_range(mapping, pmd, pmd + nptes);
__fast_smmu_free_iova(mapping, iova - offset, len);
spin_unlock_irqrestore(&mapping->lock, flags);
}
static void fast_smmu_sync_single_for_cpu(struct device *dev,
dma_addr_t iova, size_t size, enum dma_data_direction dir)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
unsigned long offset = iova & ~FAST_PAGE_MASK;
struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
if (!__fast_is_pte_coherent(pmd))
__fast_dma_page_dev_to_cpu(page, offset, size, dir);
}
static void fast_smmu_sync_single_for_device(struct device *dev,
dma_addr_t iova, size_t size, enum dma_data_direction dir)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
unsigned long offset = iova & ~FAST_PAGE_MASK;
struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
if (!__fast_is_pte_coherent(pmd))
__fast_dma_page_cpu_to_dev(page, offset, size, dir);
}
static int fast_smmu_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
unsigned long attrs)
{
return -EINVAL;
}
static void fast_smmu_unmap_sg(struct device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction dir,
unsigned long attrs)
{
WARN_ON_ONCE(1);
}
static void fast_smmu_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sg, int nents, enum dma_data_direction dir)
{
WARN_ON_ONCE(1);
}
static void fast_smmu_sync_sg_for_device(struct device *dev,
struct scatterlist *sg, int nents, enum dma_data_direction dir)
{
WARN_ON_ONCE(1);
}
static void __fast_smmu_free_pages(struct page **pages, int count)
{
int i;
for (i = 0; i < count; i++)
__free_page(pages[i]);
kvfree(pages);
}
static struct page **__fast_smmu_alloc_pages(unsigned int count, gfp_t gfp)
{
struct page **pages;
unsigned int i = 0, array_size = count * sizeof(*pages);
if (array_size <= PAGE_SIZE)
pages = kzalloc(array_size, GFP_KERNEL);
else
pages = vzalloc(array_size);
if (!pages)
return NULL;
/* IOMMU can map any pages, so himem can also be used here */
gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
for (i = 0; i < count; ++i) {
struct page *page = alloc_page(gfp);
if (!page) {
__fast_smmu_free_pages(pages, i);
return NULL;
}
pages[i] = page;
}
return pages;
}
static void *fast_smmu_alloc(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
struct sg_table sgt;
dma_addr_t dma_addr, iova_iter;
void *addr;
av8l_fast_iopte *ptep;
unsigned long flags;
struct sg_mapping_iter miter;
unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */
bool is_coherent = is_dma_coherent(dev, attrs);
pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
struct page **pages;
prot = __get_iommu_pgprot(attrs, prot, is_coherent);
*handle = DMA_ERROR_CODE;
pages = __fast_smmu_alloc_pages(count, gfp);
if (!pages) {
dev_err(dev, "no pages\n");
return NULL;
}
size = ALIGN(size, SZ_4K);
if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, gfp)) {
dev_err(dev, "no sg tablen\n");
goto out_free_pages;
}
if (!is_coherent) {
/*
* The CPU-centric flushing implied by SG_MITER_TO_SG isn't
* sufficient here, so skip it by using the "wrong" direction.
*/
sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
SG_MITER_FROM_SG);
while (sg_miter_next(&miter))
__dma_flush_area(miter.addr, miter.length);
sg_miter_stop(&miter);
}
spin_lock_irqsave(&mapping->lock, flags);
dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
if (dma_addr == DMA_ERROR_CODE) {
dev_err(dev, "no iova\n");
spin_unlock_irqrestore(&mapping->lock, flags);
goto out_free_sg;
}
iova_iter = dma_addr;
sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
SG_MITER_FROM_SG | SG_MITER_ATOMIC);
while (sg_miter_next(&miter)) {
int nptes = miter.length >> FAST_PAGE_SHIFT;
ptep = iopte_pmd_offset(mapping->pgtbl_pmds, iova_iter);
if (unlikely(av8l_fast_map_public(
ptep, page_to_phys(miter.page),
miter.length, prot))) {
dev_err(dev, "no map public\n");
/* TODO: unwind previously successful mappings */
goto out_free_iova;
}
fast_dmac_clean_range(mapping, ptep, ptep + nptes);
iova_iter += miter.length;
}
sg_miter_stop(&miter);
spin_unlock_irqrestore(&mapping->lock, flags);
addr = dma_common_pages_remap(pages, size, VM_USERMAP, remap_prot,
__builtin_return_address(0));
if (!addr) {
dev_err(dev, "no common pages\n");
goto out_unmap;
}
*handle = dma_addr;
sg_free_table(&sgt);
return addr;
out_unmap:
/* need to take the lock again for page tables and iova */
spin_lock_irqsave(&mapping->lock, flags);
ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_addr);
av8l_fast_unmap_public(ptep, size);
fast_dmac_clean_range(mapping, ptep, ptep + count);
out_free_iova:
__fast_smmu_free_iova(mapping, dma_addr, size);
spin_unlock_irqrestore(&mapping->lock, flags);
out_free_sg:
sg_free_table(&sgt);
out_free_pages:
__fast_smmu_free_pages(pages, count);
return NULL;
}
static void fast_smmu_free(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
struct vm_struct *area;
struct page **pages;
size_t count = ALIGN(size, SZ_4K) >> FAST_PAGE_SHIFT;
av8l_fast_iopte *ptep;
unsigned long flags;
size = ALIGN(size, SZ_4K);
area = find_vm_area(vaddr);
if (WARN_ON_ONCE(!area))
return;
pages = area->pages;
dma_common_free_remap(vaddr, size, VM_USERMAP, false);
ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_handle);
spin_lock_irqsave(&mapping->lock, flags);
av8l_fast_unmap_public(ptep, size);
fast_dmac_clean_range(mapping, ptep, ptep + count);
__fast_smmu_free_iova(mapping, dma_handle, size);
spin_unlock_irqrestore(&mapping->lock, flags);
__fast_smmu_free_pages(pages, count);
}
static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr,
size_t size, unsigned long attrs)
{
struct vm_struct *area;
unsigned long uaddr = vma->vm_start;
struct page **pages;
int i, nr_pages, ret = 0;
bool coherent = is_dma_coherent(dev, attrs);
vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
coherent);
area = find_vm_area(cpu_addr);
if (!area)
return -EINVAL;
pages = area->pages;
nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
for (i = vma->vm_pgoff; i < nr_pages && uaddr < vma->vm_end; i++) {
ret = vm_insert_page(vma, uaddr, pages[i]);
if (ret)
break;
uaddr += PAGE_SIZE;
}
return ret;
}
static int fast_smmu_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr,
size_t size, unsigned long attrs)
{
unsigned int n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct vm_struct *area;
area = find_vm_area(cpu_addr);
if (!area || !area->pages)
return -EINVAL;
return sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size,
GFP_KERNEL);
}
static dma_addr_t fast_smmu_dma_map_resource(
struct device *dev, phys_addr_t phys_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
size_t offset = phys_addr & ~FAST_PAGE_MASK;
size_t len = round_up(size + offset, FAST_PAGE_SIZE);
dma_addr_t dma_addr;
int prot;
unsigned long flags;
spin_lock_irqsave(&mapping->lock, flags);
dma_addr = __fast_smmu_alloc_iova(mapping, attrs, len);
spin_unlock_irqrestore(&mapping->lock, flags);
if (dma_addr == DMA_ERROR_CODE)
return dma_addr;
prot = __fast_dma_direction_to_prot(dir);
prot |= IOMMU_MMIO;
if (iommu_map(mapping->domain, dma_addr, phys_addr - offset,
len, prot)) {
spin_lock_irqsave(&mapping->lock, flags);
__fast_smmu_free_iova(mapping, dma_addr, len);
spin_unlock_irqrestore(&mapping->lock, flags);
return DMA_ERROR_CODE;
}
return dma_addr + offset;
}
static void fast_smmu_dma_unmap_resource(
struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
size_t offset = addr & ~FAST_PAGE_MASK;
size_t len = round_up(size + offset, FAST_PAGE_SIZE);
unsigned long flags;
iommu_unmap(mapping->domain, addr - offset, len);
spin_lock_irqsave(&mapping->lock, flags);
__fast_smmu_free_iova(mapping, addr - offset, len);
spin_unlock_irqrestore(&mapping->lock, flags);
}
static int fast_smmu_mapping_error(struct device *dev,
dma_addr_t dma_addr)
{
return dma_addr == DMA_ERROR_CODE;
}
static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
void *data)
{
av8l_fast_iopte *ptep = data;
dma_addr_t iova;
unsigned long bitmap_idx;
bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds);
iova = bitmap_idx << FAST_PAGE_SHIFT;
dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep,
fast->pgtbl_pmds, bitmap_idx);
print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
32, 8, fast->bitmap, fast->bitmap_size, false);
}
static int fast_smmu_notify(struct notifier_block *self,
unsigned long action, void *data)
{
struct dma_fast_smmu_mapping *fast = container_of(
self, struct dma_fast_smmu_mapping, notifier);
switch (action) {
case MAPPED_OVER_STALE_TLB:
__fast_smmu_mapped_over_stale(fast, data);
return NOTIFY_OK;
default:
WARN(1, "Unhandled notifier action");
return NOTIFY_DONE;
}
}
static const struct dma_map_ops fast_smmu_dma_ops = {
.alloc = fast_smmu_alloc,
.free = fast_smmu_free,
.mmap = fast_smmu_mmap_attrs,
.get_sgtable = fast_smmu_get_sgtable,
.map_page = fast_smmu_map_page,
.unmap_page = fast_smmu_unmap_page,
.sync_single_for_cpu = fast_smmu_sync_single_for_cpu,
.sync_single_for_device = fast_smmu_sync_single_for_device,
.map_sg = fast_smmu_map_sg,
.unmap_sg = fast_smmu_unmap_sg,
.sync_sg_for_cpu = fast_smmu_sync_sg_for_cpu,
.sync_sg_for_device = fast_smmu_sync_sg_for_device,
.map_resource = fast_smmu_dma_map_resource,
.unmap_resource = fast_smmu_dma_unmap_resource,
.mapping_error = fast_smmu_mapping_error,
};
/**
* __fast_smmu_create_mapping_sized
* @base: bottom of the VA range
* @size: size of the VA range in bytes
*
* Creates a mapping structure which holds information about used/unused IO
* address ranges, which is required to perform mapping with IOMMU aware
* functions. The only VA range supported is [0, 4GB).
*
* The client device need to be attached to the mapping with
* fast_smmu_attach_device function.
*/
static struct dma_fast_smmu_mapping *__fast_smmu_create_mapping_sized(
dma_addr_t base, u64 size)
{
struct dma_fast_smmu_mapping *fast;
fast = kzalloc(sizeof(struct dma_fast_smmu_mapping), GFP_KERNEL);
if (!fast)
goto err;
fast->base = base;
fast->size = size;
fast->num_4k_pages = size >> FAST_PAGE_SHIFT;
fast->bitmap_size = BITS_TO_LONGS(fast->num_4k_pages) * sizeof(long);
fast->bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL | __GFP_NOWARN |
__GFP_NORETRY);
if (!fast->bitmap)
fast->bitmap = vzalloc(fast->bitmap_size);
if (!fast->bitmap)
goto err2;
spin_lock_init(&fast->lock);
return fast;
err2:
kfree(fast);
err:
return ERR_PTR(-ENOMEM);
}
/*
* Based off of similar code from dma-iommu.c, but modified to use a different
* iova allocator
*/
static void fast_smmu_reserve_pci_windows(struct device *dev,
struct dma_fast_smmu_mapping *mapping)
{
struct pci_host_bridge *bridge;
struct resource_entry *window;
phys_addr_t start, end;
struct pci_dev *pci_dev;
unsigned long flags;
if (!dev_is_pci(dev))
return;
pci_dev = to_pci_dev(dev);
bridge = pci_find_host_bridge(pci_dev->bus);
spin_lock_irqsave(&mapping->lock, flags);
resource_list_for_each_entry(window, &bridge->windows) {
if (resource_type(window->res) != IORESOURCE_MEM &&
resource_type(window->res) != IORESOURCE_IO)
continue;
start = round_down(window->res->start - window->offset,
FAST_PAGE_SIZE);
end = round_up(window->res->end - window->offset,
FAST_PAGE_SIZE);
start = max_t(unsigned long, mapping->base, start);
end = min_t(unsigned long, mapping->base + mapping->size, end);
if (start >= end)
continue;
dev_dbg(dev, "iova allocator reserved 0x%pa-0x%pa\n",
&start, &end);
start = (start - mapping->base) >> FAST_PAGE_SHIFT;
end = (end - mapping->base) >> FAST_PAGE_SHIFT;
bitmap_set(mapping->bitmap, start, end - start);
}
spin_unlock_irqrestore(&mapping->lock, flags);
}
static int fast_smmu_errata_init(struct dma_iommu_mapping *mapping)
{
struct dma_fast_smmu_mapping *fast = mapping->fast;
int vmid = VMID_HLOS;
int min_iova_align = 0;
iommu_domain_get_attr(mapping->domain,
DOMAIN_ATTR_QCOM_MMU500_ERRATA_MIN_IOVA_ALIGN,
&min_iova_align);
iommu_domain_get_attr(mapping->domain, DOMAIN_ATTR_SECURE_VMID, &vmid);
if (vmid >= VMID_LAST || vmid < 0)
vmid = VMID_HLOS;
if (min_iova_align) {
fast->min_iova_align = ARM_SMMU_MIN_IOVA_ALIGN;
fast->guard_page = arm_smmu_errata_get_guard_page(vmid);
if (!fast->guard_page)
return -ENOMEM;
}
return 0;
}
/**
* fast_smmu_init_mapping
* @dev: valid struct device pointer
* @mapping: io address space mapping structure (returned from
* arm_iommu_create_mapping)
*
* Called the first time a device is attached to this mapping.
* Not for dma client use.
*/
int fast_smmu_init_mapping(struct device *dev,
struct dma_iommu_mapping *mapping)
{
int err;
struct iommu_domain *domain = mapping->domain;
struct iommu_pgtbl_info info;
u64 size = (u64)mapping->bits << PAGE_SHIFT;
if (mapping->base + size > (SZ_1G * 4ULL)) {
dev_err(dev, "Iova end address too large\n");
return -EINVAL;
}
mapping->fast = __fast_smmu_create_mapping_sized(mapping->base, size);
if (IS_ERR(mapping->fast))
return -ENOMEM;
mapping->fast->domain = domain;
mapping->fast->dev = dev;
if (fast_smmu_errata_init(mapping))
goto release_mapping;
fast_smmu_reserve_pci_windows(dev, mapping->fast);
if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PGTBL_INFO,
&info)) {
dev_err(dev, "Couldn't get page table info\n");
err = -EINVAL;
goto release_mapping;
}
mapping->fast->pgtbl_pmds = info.pmds;
if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PAGE_TABLE_IS_COHERENT,
&mapping->fast->is_smmu_pt_coherent)) {
err = -EINVAL;
goto release_mapping;
}
mapping->fast->notifier.notifier_call = fast_smmu_notify;
av8l_register_notify(&mapping->fast->notifier);
mapping->ops = &fast_smmu_dma_ops;
return 0;
release_mapping:
kfree(mapping->fast->bitmap);
kfree(mapping->fast);
return err;
}
/**
* fast_smmu_release_mapping
* @kref: dma_iommu_mapping->kref
*
* Cleans up the given iommu mapping.
*/
void fast_smmu_release_mapping(struct kref *kref)
{
struct dma_iommu_mapping *mapping =
container_of(kref, struct dma_iommu_mapping, kref);
kvfree(mapping->fast->bitmap);
kfree(mapping->fast);
iommu_domain_free(mapping->domain);
kfree(mapping);
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,712 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
*/
#define pr_fmt(fmt) "io-pgtable-fast: " fmt
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/scatterlist.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/io-pgtable-fast.h>
#include <asm/cacheflush.h>
#include <linux/vmalloc.h>
#include "io-pgtable.h"
#define AV8L_FAST_MAX_ADDR_BITS 48
/* Struct accessors */
#define iof_pgtable_to_data(x) \
container_of((x), struct av8l_fast_io_pgtable, iop)
#define iof_pgtable_ops_to_pgtable(x) \
container_of((x), struct io_pgtable, ops)
#define iof_pgtable_ops_to_data(x) \
iof_pgtable_to_data(iof_pgtable_ops_to_pgtable(x))
struct av8l_fast_io_pgtable {
struct io_pgtable iop;
av8l_fast_iopte *pgd;
av8l_fast_iopte *puds[4];
av8l_fast_iopte *pmds;
struct page **pages; /* page table memory */
};
/* Page table bits */
#define AV8L_FAST_PTE_TYPE_SHIFT 0
#define AV8L_FAST_PTE_TYPE_MASK 0x3
#define AV8L_FAST_PTE_TYPE_BLOCK 1
#define AV8L_FAST_PTE_TYPE_TABLE 3
#define AV8L_FAST_PTE_TYPE_PAGE 3
#define AV8L_FAST_PTE_NSTABLE (((av8l_fast_iopte)1) << 63)
#define AV8L_FAST_PTE_XN (((av8l_fast_iopte)3) << 53)
#define AV8L_FAST_PTE_AF (((av8l_fast_iopte)1) << 10)
#define AV8L_FAST_PTE_SH_NS (((av8l_fast_iopte)0) << 8)
#define AV8L_FAST_PTE_SH_OS (((av8l_fast_iopte)2) << 8)
#define AV8L_FAST_PTE_SH_IS (((av8l_fast_iopte)3) << 8)
#define AV8L_FAST_PTE_NS (((av8l_fast_iopte)1) << 5)
#define AV8L_FAST_PTE_VALID (((av8l_fast_iopte)1) << 0)
#define AV8L_FAST_PTE_ATTR_LO_MASK (((av8l_fast_iopte)0x3ff) << 2)
/* Ignore the contiguous bit for block splitting */
#define AV8L_FAST_PTE_ATTR_HI_MASK (((av8l_fast_iopte)6) << 52)
#define AV8L_FAST_PTE_ATTR_MASK (AV8L_FAST_PTE_ATTR_LO_MASK | \
AV8L_FAST_PTE_ATTR_HI_MASK)
#define AV8L_FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
/* Stage-1 PTE */
#define AV8L_FAST_PTE_AP_PRIV_RW (((av8l_fast_iopte)0) << 6)
#define AV8L_FAST_PTE_AP_RW (((av8l_fast_iopte)1) << 6)
#define AV8L_FAST_PTE_AP_PRIV_RO (((av8l_fast_iopte)2) << 6)
#define AV8L_FAST_PTE_AP_RO (((av8l_fast_iopte)3) << 6)
#define AV8L_FAST_PTE_ATTRINDX_SHIFT 2
#define AV8L_FAST_PTE_nG (((av8l_fast_iopte)1) << 11)
/* Stage-2 PTE */
#define AV8L_FAST_PTE_HAP_FAULT (((av8l_fast_iopte)0) << 6)
#define AV8L_FAST_PTE_HAP_READ (((av8l_fast_iopte)1) << 6)
#define AV8L_FAST_PTE_HAP_WRITE (((av8l_fast_iopte)2) << 6)
#define AV8L_FAST_PTE_MEMATTR_OIWB (((av8l_fast_iopte)0xf) << 2)
#define AV8L_FAST_PTE_MEMATTR_NC (((av8l_fast_iopte)0x5) << 2)
#define AV8L_FAST_PTE_MEMATTR_DEV (((av8l_fast_iopte)0x1) << 2)
/* Register bits */
#define ARM_32_LPAE_TCR_EAE (1 << 31)
#define ARM_64_LPAE_S2_TCR_RES1 (1 << 31)
#define AV8L_FAST_TCR_TG0_4K (0 << 14)
#define AV8L_FAST_TCR_TG0_64K (1 << 14)
#define AV8L_FAST_TCR_TG0_16K (2 << 14)
#define AV8L_FAST_TCR_SH0_SHIFT 12
#define AV8L_FAST_TCR_SH0_MASK 0x3
#define AV8L_FAST_TCR_SH_NS 0
#define AV8L_FAST_TCR_SH_OS 2
#define AV8L_FAST_TCR_SH_IS 3
#define AV8L_FAST_TCR_ORGN0_SHIFT 10
#define AV8L_FAST_TCR_IRGN0_SHIFT 8
#define AV8L_FAST_TCR_RGN_MASK 0x3
#define AV8L_FAST_TCR_RGN_NC 0
#define AV8L_FAST_TCR_RGN_WBWA 1
#define AV8L_FAST_TCR_RGN_WT 2
#define AV8L_FAST_TCR_RGN_WB 3
#define AV8L_FAST_TCR_SL0_SHIFT 6
#define AV8L_FAST_TCR_SL0_MASK 0x3
#define AV8L_FAST_TCR_T0SZ_SHIFT 0
#define AV8L_FAST_TCR_SZ_MASK 0xf
#define AV8L_FAST_TCR_PS_SHIFT 16
#define AV8L_FAST_TCR_PS_MASK 0x7
#define AV8L_FAST_TCR_IPS_SHIFT 32
#define AV8L_FAST_TCR_IPS_MASK 0x7
#define AV8L_FAST_TCR_PS_32_BIT 0x0ULL
#define AV8L_FAST_TCR_PS_36_BIT 0x1ULL
#define AV8L_FAST_TCR_PS_40_BIT 0x2ULL
#define AV8L_FAST_TCR_PS_42_BIT 0x3ULL
#define AV8L_FAST_TCR_PS_44_BIT 0x4ULL
#define AV8L_FAST_TCR_PS_48_BIT 0x5ULL
#define AV8L_FAST_TCR_EPD1_SHIFT 23
#define AV8L_FAST_TCR_EPD1_FAULT 1
#define AV8L_FAST_MAIR_ATTR_SHIFT(n) ((n) << 3)
#define AV8L_FAST_MAIR_ATTR_MASK 0xff
#define AV8L_FAST_MAIR_ATTR_DEVICE 0x04
#define AV8L_FAST_MAIR_ATTR_NC 0x44
#define AV8L_FAST_MAIR_ATTR_WBRWA 0xff
#define AV8L_FAST_MAIR_ATTR_UPSTREAM 0xf4
#define AV8L_FAST_MAIR_ATTR_IDX_NC 0
#define AV8L_FAST_MAIR_ATTR_IDX_CACHE 1
#define AV8L_FAST_MAIR_ATTR_IDX_DEV 2
#define AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM 3
#define AV8L_FAST_PAGE_SHIFT 12
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
#include <asm/cacheflush.h>
#include <linux/notifier.h>
static ATOMIC_NOTIFIER_HEAD(av8l_notifier_list);
void av8l_register_notify(struct notifier_block *nb)
{
atomic_notifier_chain_register(&av8l_notifier_list, nb);
}
EXPORT_SYMBOL(av8l_register_notify);
static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
{
if (unlikely(*ptep)) {
atomic_notifier_call_chain(
&av8l_notifier_list, MAPPED_OVER_STALE_TLB,
(void *) ptep);
pr_err("Tried to map over a non-vacant pte: 0x%llx @ %p\n",
*ptep, ptep);
pr_err("Nearby memory:\n");
print_hex_dump(KERN_ERR, "pgtbl: ", DUMP_PREFIX_ADDRESS,
32, 8, ptep - 16, 32 * sizeof(*ptep), false);
}
}
void av8l_fast_clear_stale_ptes(av8l_fast_iopte *pmds, bool skip_sync)
{
int i;
av8l_fast_iopte *pmdp = pmds;
for (i = 0; i < ((SZ_1G * 4UL) >> AV8L_FAST_PAGE_SHIFT); ++i) {
if (!(*pmdp & AV8L_FAST_PTE_VALID)) {
*pmdp = 0;
if (!skip_sync)
dmac_clean_range(pmdp, pmdp + 1);
}
pmdp++;
}
}
#else
static void __av8l_check_for_stale_tlb(av8l_fast_iopte *ptep)
{
}
#endif
/* caller must take care of cache maintenance on *ptep */
int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
int prot)
{
int i, nptes = size >> AV8L_FAST_PAGE_SHIFT;
av8l_fast_iopte pte = AV8L_FAST_PTE_XN
| AV8L_FAST_PTE_TYPE_PAGE
| AV8L_FAST_PTE_AF
| AV8L_FAST_PTE_nG
| AV8L_FAST_PTE_SH_OS;
if (prot & IOMMU_MMIO)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_DEV
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
else if (prot & IOMMU_CACHE)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_CACHE
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
else if (prot & IOMMU_USE_UPSTREAM_HINT)
pte |= (AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM
<< AV8L_FAST_PTE_ATTRINDX_SHIFT);
if (!(prot & IOMMU_WRITE))
pte |= AV8L_FAST_PTE_AP_RO;
else
pte |= AV8L_FAST_PTE_AP_RW;
paddr &= AV8L_FAST_PTE_ADDR_MASK;
for (i = 0; i < nptes; i++, paddr += SZ_4K) {
__av8l_check_for_stale_tlb(ptep + i);
*(ptep + i) = pte | paddr;
}
return 0;
}
static int av8l_fast_map(struct io_pgtable_ops *ops, unsigned long iova,
phys_addr_t paddr, size_t size, int prot)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova);
unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
av8l_fast_map_public(ptep, paddr, size, prot);
dmac_clean_range(ptep, ptep + nptes);
return 0;
}
static void __av8l_fast_unmap(av8l_fast_iopte *ptep, size_t size,
bool need_stale_tlb_tracking)
{
unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
int val = need_stale_tlb_tracking
? AV8L_FAST_PTE_UNMAPPED_NEED_TLBI
: 0;
memset(ptep, val, sizeof(*ptep) * nptes);
}
/* caller must take care of cache maintenance on *ptep */
void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size)
{
__av8l_fast_unmap(ptep, size, true);
}
static size_t av8l_fast_unmap(struct io_pgtable_ops *ops, unsigned long iova,
size_t size)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
struct io_pgtable *iop = &data->iop;
av8l_fast_iopte *ptep = iopte_pmd_offset(data->pmds, iova);
unsigned long nptes = size >> AV8L_FAST_PAGE_SHIFT;
__av8l_fast_unmap(ptep, size, false);
dmac_clean_range(ptep, ptep + nptes);
io_pgtable_tlb_flush_all(iop);
return size;
}
#if defined(CONFIG_ARM64)
#define FAST_PGDNDX(va) (((va) & 0x7fc0000000) >> 27)
#elif defined(CONFIG_ARM)
#define FAST_PGDNDX(va) (((va) & 0xc0000000) >> 27)
#endif
static phys_addr_t av8l_fast_iova_to_phys(struct io_pgtable_ops *ops,
unsigned long iova)
{
struct av8l_fast_io_pgtable *data = iof_pgtable_ops_to_data(ops);
av8l_fast_iopte pte, *pgdp, *pudp, *pmdp;
unsigned long pgd;
phys_addr_t phys;
const unsigned long pts = AV8L_FAST_PTE_TYPE_SHIFT;
const unsigned long ptm = AV8L_FAST_PTE_TYPE_MASK;
const unsigned long ptt = AV8L_FAST_PTE_TYPE_TABLE;
const unsigned long ptp = AV8L_FAST_PTE_TYPE_PAGE;
const av8l_fast_iopte am = AV8L_FAST_PTE_ADDR_MASK;
/* TODO: clean up some of these magic numbers... */
pgd = (unsigned long)data->pgd | FAST_PGDNDX(iova);
pgdp = (av8l_fast_iopte *)pgd;
pte = *pgdp;
if (((pte >> pts) & ptm) != ptt)
return 0;
pudp = phys_to_virt((pte & am) | ((iova & 0x3fe00000) >> 18));
pte = *pudp;
if (((pte >> pts) & ptm) != ptt)
return 0;
pmdp = phys_to_virt((pte & am) | ((iova & 0x1ff000) >> 9));
pte = *pmdp;
if (((pte >> pts) & ptm) != ptp)
return 0;
phys = pte & am;
return phys | (iova & 0xfff);
}
static int av8l_fast_map_sg(struct io_pgtable_ops *ops, unsigned long iova,
struct scatterlist *sg, unsigned int nents,
int prot, size_t *size)
{
return -ENODEV;
}
static struct av8l_fast_io_pgtable *
av8l_fast_alloc_pgtable_data(struct io_pgtable_cfg *cfg)
{
struct av8l_fast_io_pgtable *data;
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return NULL;
data->iop.ops = (struct io_pgtable_ops) {
.map = av8l_fast_map,
.map_sg = av8l_fast_map_sg,
.unmap = av8l_fast_unmap,
.iova_to_phys = av8l_fast_iova_to_phys,
};
return data;
}
/*
* We need 1 page for the pgd, 4 pages for puds (1GB VA per pud page) and
* 2048 pages for pmds (each pud page contains 512 table entries, each
* pointing to a pmd).
*/
#define NUM_PGD_PAGES 1
#define NUM_PUD_PAGES 4
#define NUM_PMD_PAGES 2048
#define NUM_PGTBL_PAGES (NUM_PGD_PAGES + NUM_PUD_PAGES + NUM_PMD_PAGES)
static int
av8l_fast_prepopulate_pgtables(struct av8l_fast_io_pgtable *data,
struct io_pgtable_cfg *cfg, void *cookie)
{
int i, j, pg = 0;
struct page **pages, *page;
pages = kmalloc(sizeof(*pages) * NUM_PGTBL_PAGES, __GFP_NOWARN |
__GFP_NORETRY);
if (!pages)
pages = vmalloc(sizeof(*pages) * NUM_PGTBL_PAGES);
if (!pages)
return -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages_arr;
pages[pg++] = page;
data->pgd = page_address(page);
/*
* We need 2048 entries at level 2 to map 4GB of VA space. A page
* can hold 512 entries, so we need 4 pages.
*/
for (i = 0; i < 4; ++i) {
av8l_fast_iopte pte, *ptep;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages;
pages[pg++] = page;
data->puds[i] = page_address(page);
pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
ptep = ((av8l_fast_iopte *)data->pgd) + i;
*ptep = pte;
}
dmac_clean_range(data->pgd, data->pgd + 4);
/*
* We have 4 puds, each of which can point to 512 pmds, so we'll
* have 2048 pmds, each of which can hold 512 ptes, for a grand
* total of 2048*512=1048576 PTEs.
*/
for (i = 0; i < 4; ++i) {
for (j = 0; j < 512; ++j) {
av8l_fast_iopte pte, *pudp;
void *addr;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_free_pages;
pages[pg++] = page;
addr = page_address(page);
dmac_clean_range(addr, addr + SZ_4K);
pte = page_to_phys(page) | AV8L_FAST_PTE_TYPE_TABLE;
pudp = data->puds[i] + j;
*pudp = pte;
}
dmac_clean_range(data->puds[i], data->puds[i] + 512);
}
if (WARN_ON(pg != NUM_PGTBL_PAGES))
goto err_free_pages;
/*
* We map the pmds into a virtually contiguous space so that we
* don't have to traverse the first two levels of the page tables
* to find the appropriate pud. Instead, it will be a simple
* offset from the virtual base of the pmds.
*/
data->pmds = vmap(&pages[NUM_PGD_PAGES + NUM_PUD_PAGES], NUM_PMD_PAGES,
VM_IOREMAP, PAGE_KERNEL);
if (!data->pmds)
goto err_free_pages;
data->pages = pages;
return 0;
err_free_pages:
for (i = 0; i < pg; ++i)
__free_page(pages[i]);
err_free_pages_arr:
kvfree(pages);
return -ENOMEM;
}
static struct io_pgtable *
av8l_fast_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
{
u64 reg;
struct av8l_fast_io_pgtable *data =
av8l_fast_alloc_pgtable_data(cfg);
if (!data)
return NULL;
/* restrict according to the fast map requirements */
cfg->ias = 32;
cfg->pgsize_bitmap = SZ_4K;
/* TCR */
if (cfg->quirks & IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT)
reg = (AV8L_FAST_TCR_SH_OS << AV8L_FAST_TCR_SH0_SHIFT) |
(AV8L_FAST_TCR_RGN_NC << AV8L_FAST_TCR_IRGN0_SHIFT) |
(AV8L_FAST_TCR_RGN_WBWA << AV8L_FAST_TCR_ORGN0_SHIFT);
else if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
reg = (AV8L_FAST_TCR_SH_OS << AV8L_FAST_TCR_SH0_SHIFT) |
(AV8L_FAST_TCR_RGN_WBWA << AV8L_FAST_TCR_IRGN0_SHIFT) |
(AV8L_FAST_TCR_RGN_WBWA << AV8L_FAST_TCR_ORGN0_SHIFT);
else
reg = (AV8L_FAST_TCR_SH_OS << AV8L_FAST_TCR_SH0_SHIFT) |
(AV8L_FAST_TCR_RGN_NC << AV8L_FAST_TCR_IRGN0_SHIFT) |
(AV8L_FAST_TCR_RGN_NC << AV8L_FAST_TCR_ORGN0_SHIFT);
reg |= AV8L_FAST_TCR_TG0_4K;
switch (cfg->oas) {
case 32:
reg |= (AV8L_FAST_TCR_PS_32_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
case 36:
reg |= (AV8L_FAST_TCR_PS_36_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
case 40:
reg |= (AV8L_FAST_TCR_PS_40_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
case 42:
reg |= (AV8L_FAST_TCR_PS_42_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
case 44:
reg |= (AV8L_FAST_TCR_PS_44_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
case 48:
reg |= (AV8L_FAST_TCR_PS_48_BIT << AV8L_FAST_TCR_IPS_SHIFT);
break;
default:
goto out_free_data;
}
reg |= (64ULL - cfg->ias) << AV8L_FAST_TCR_T0SZ_SHIFT;
reg |= AV8L_FAST_TCR_EPD1_FAULT << AV8L_FAST_TCR_EPD1_SHIFT;
#if defined(CONFIG_ARM)
reg |= ARM_32_LPAE_TCR_EAE;
#endif
cfg->av8l_fast_cfg.tcr = reg;
/* MAIRs */
reg = (AV8L_FAST_MAIR_ATTR_NC
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_NC)) |
(AV8L_FAST_MAIR_ATTR_WBRWA
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_CACHE)) |
(AV8L_FAST_MAIR_ATTR_DEVICE
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_DEV)) |
(AV8L_FAST_MAIR_ATTR_UPSTREAM
<< AV8L_FAST_MAIR_ATTR_SHIFT(AV8L_FAST_MAIR_ATTR_IDX_UPSTREAM));
cfg->av8l_fast_cfg.mair[0] = reg;
cfg->av8l_fast_cfg.mair[1] = 0;
/* Allocate all page table memory! */
if (av8l_fast_prepopulate_pgtables(data, cfg, cookie))
goto out_free_data;
cfg->av8l_fast_cfg.pmds = data->pmds;
/* TTBRs */
cfg->av8l_fast_cfg.ttbr[0] = virt_to_phys(data->pgd);
cfg->av8l_fast_cfg.ttbr[1] = 0;
return &data->iop;
out_free_data:
kfree(data);
return NULL;
}
static void av8l_fast_free_pgtable(struct io_pgtable *iop)
{
int i;
struct av8l_fast_io_pgtable *data = iof_pgtable_to_data(iop);
vunmap(data->pmds);
for (i = 0; i < NUM_PGTBL_PAGES; ++i)
__free_page(data->pages[i]);
kvfree(data->pages);
kfree(data);
}
struct io_pgtable_init_fns io_pgtable_av8l_fast_init_fns = {
.alloc = av8l_fast_alloc_pgtable,
.free = av8l_fast_free_pgtable,
};
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_SELFTEST
#include <linux/dma-contiguous.h>
static struct io_pgtable_cfg *cfg_cookie;
static void dummy_tlb_flush_all(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
static void dummy_tlb_add_flush(unsigned long iova, size_t size, size_t granule,
bool leaf, void *cookie)
{
WARN_ON(cookie != cfg_cookie);
WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}
static void dummy_tlb_sync(void *cookie)
{
WARN_ON(cookie != cfg_cookie);
}
static struct iommu_gather_ops dummy_tlb_ops __initdata = {
.tlb_flush_all = dummy_tlb_flush_all,
.tlb_add_flush = dummy_tlb_add_flush,
.tlb_sync = dummy_tlb_sync,
};
/*
* Returns true if the iova range is successfully mapped to the contiguous
* phys range in ops.
*/
static bool av8l_fast_range_has_specific_mapping(struct io_pgtable_ops *ops,
const unsigned long iova_start,
const phys_addr_t phys_start,
const size_t size)
{
u64 iova = iova_start;
phys_addr_t phys = phys_start;
while (iova < (iova_start + size)) {
/* + 42 just to make sure offsetting is working */
if (ops->iova_to_phys(ops, iova + 42) != (phys + 42))
return false;
iova += SZ_4K;
phys += SZ_4K;
}
return true;
}
static int __init av8l_fast_positive_testing(void)
{
int failed = 0;
u64 iova;
struct io_pgtable_ops *ops;
struct io_pgtable_cfg cfg;
struct av8l_fast_io_pgtable *data;
av8l_fast_iopte *pmds;
u64 max = SZ_1G * 4ULL - 1;
cfg = (struct io_pgtable_cfg) {
.quirks = 0,
.tlb = &dummy_tlb_ops,
.ias = 32,
.oas = 32,
.pgsize_bitmap = SZ_4K,
};
cfg_cookie = &cfg;
ops = alloc_io_pgtable_ops(ARM_V8L_FAST, &cfg, &cfg);
if (WARN_ON(!ops))
return 1;
data = iof_pgtable_ops_to_data(ops);
pmds = data->pmds;
/* map the entire 4GB VA space with 4K map calls */
for (iova = 0; iova < max; iova += SZ_4K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_4K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0,
max)))
failed++;
/* unmap it all */
for (iova = 0; iova < max; iova += SZ_4K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_4K) != SZ_4K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(pmds, false);
/* map the entire 4GB VA space with 8K map calls */
for (iova = 0; iova < max; iova += SZ_8K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_8K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0,
max)))
failed++;
/* unmap it all with 8K unmap calls */
for (iova = 0; iova < max; iova += SZ_8K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_8K) != SZ_8K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(pmds, false);
/* map the entire 4GB VA space with 16K map calls */
for (iova = 0; iova < max; iova += SZ_16K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_16K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0,
max)))
failed++;
/* unmap it all */
for (iova = 0; iova < max; iova += SZ_16K) {
if (WARN_ON(ops->unmap(ops, iova, SZ_16K) != SZ_16K))
failed++;
}
/* sweep up TLB proving PTEs */
av8l_fast_clear_stale_ptes(pmds, false);
/* map the entire 4GB VA space with 64K map calls */
for (iova = 0; iova < max; iova += SZ_64K) {
if (WARN_ON(ops->map(ops, iova, iova, SZ_64K, IOMMU_READ))) {
failed++;
continue;
}
}
if (WARN_ON(!av8l_fast_range_has_specific_mapping(ops, 0, 0,
max)))
failed++;
/* unmap it all at once */
if (WARN_ON(ops->unmap(ops, 0, max) != max))
failed++;
free_io_pgtable_ops(ops);
return failed;
}
static int __init av8l_fast_do_selftests(void)
{
int failed = 0;
failed += av8l_fast_positive_testing();
pr_err("selftest: completed with %d failures\n", failed);
return 0;
}
subsys_initcall(av8l_fast_do_selftests);
#endif

View file

@ -18,9 +18,15 @@
* Author: Will Deacon <will.deacon@arm.com>
*/
#define pr_fmt(fmt) "io-pgtable: " fmt
#include <linux/bug.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/iommu.h>
#include <linux/debugfs.h>
#include <linux/atomic.h>
#include <linux/module.h>
#include "io-pgtable.h"
@ -35,8 +41,13 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S
[ARM_V7S] = &io_pgtable_arm_v7s_init_fns,
#endif
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
[ARM_V8L_FAST] = &io_pgtable_av8l_fast_init_fns,
#endif
};
static struct dentry *io_pgtable_top;
struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt,
struct io_pgtable_cfg *cfg,
void *cookie)
@ -77,3 +88,56 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops)
io_pgtable_tlb_flush_all(iop);
io_pgtable_init_table[iop->fmt]->free(iop);
}
static atomic_t pages_allocated;
void *io_pgtable_alloc_pages_exact(struct io_pgtable_cfg *cfg, void *cookie,
size_t size, gfp_t gfp_mask)
{
void *ret;
if (cfg->tlb->alloc_pages_exact)
ret = cfg->tlb->alloc_pages_exact(cookie, size, gfp_mask);
else
ret = alloc_pages_exact(size, gfp_mask);
if (likely(ret))
atomic_add(1 << get_order(size), &pages_allocated);
return ret;
}
void io_pgtable_free_pages_exact(struct io_pgtable_cfg *cfg, void *cookie,
void *virt, size_t size)
{
if (cfg->tlb->free_pages_exact)
cfg->tlb->free_pages_exact(cookie, virt, size);
else
free_pages_exact(virt, size);
atomic_sub(1 << get_order(size), &pages_allocated);
}
static int io_pgtable_init(void)
{
io_pgtable_top = debugfs_create_dir("io-pgtable", iommu_debugfs_top);
if (!io_pgtable_top)
return -ENODEV;
if (!debugfs_create_atomic_t("pages", 0600,
io_pgtable_top, &pages_allocated)) {
debugfs_remove_recursive(io_pgtable_top);
return -ENODEV;
}
return 0;
}
static void io_pgtable_exit(void)
{
debugfs_remove_recursive(io_pgtable_top);
}
module_init(io_pgtable_init);
module_exit(io_pgtable_exit);

View file

@ -3,6 +3,9 @@
#define __IO_PGTABLE_H
#include <linux/bitops.h>
#include <linux/scatterlist.h>
#include <soc/qcom/msm_tz_smmu.h>
/*
* Public API for use by IOMMU drivers
*/
@ -12,6 +15,8 @@ enum io_pgtable_fmt {
ARM_64_LPAE_S1,
ARM_64_LPAE_S2,
ARM_V7S,
ARM_V8L_FAST,
ARM_MSM_SECURE,
IO_PGTABLE_NUM_FMTS,
};
@ -23,6 +28,10 @@ enum io_pgtable_fmt {
* @tlb_sync: Ensure any queued TLB invalidation has taken effect, and
* any corresponding page table updates are visible to the
* IOMMU.
* @alloc_pages_exact: Allocate page table memory (optional, defaults to
* alloc_pages_exact)
* @free_pages_exact: Free page table memory (optional, defaults to
* free_pages_exact)
*
* Note that these can all be called in atomic context and must therefore
* not block.
@ -32,6 +41,8 @@ struct iommu_gather_ops {
void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule,
bool leaf, void *cookie);
void (*tlb_sync)(void *cookie);
void *(*alloc_pages_exact)(void *cookie, size_t size, gfp_t gfp_mask);
void (*free_pages_exact)(void *cookie, void *virt, size_t size);
};
/**
@ -67,16 +78,36 @@ struct io_pgtable_cfg {
* when the SoC is in "4GB mode" and they can only access the high
* remap of DRAM (0x1_00000000 to 0x1_ffffffff).
*
* IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
* be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
* software-emulated IOMMU), such that pagetable updates need not
* be treated as explicit DMA data.
*
* IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE:
* Having page tables which are non coherent, but cached in a
* system cache requires SH=Non-Shareable. This applies to the
* qsmmuv500 model. For data buffers SH=Non-Shareable is not
* required.
* IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT: Override the attributes
* set in TCR for the page table walker. Use attributes specified
* by the upstream hw instead.
*
* IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA: Override the attributes
* set in TCR for the page table walker with Write-Back,
* no Write-Allocate cacheable encoding.
*
*/
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
#define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2)
#define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3)
#define IO_PGTABLE_QUIRK_NO_DMA BIT(4)
#define IO_PGTABLE_QUIRK_QSMMUV500_NON_SHAREABLE BIT(5)
#define IO_PGTABLE_QUIRK_QCOM_USE_UPSTREAM_HINT BIT(6)
#define IO_PGTABLE_QUIRK_QCOM_USE_LLC_NWA BIT(7)
unsigned long quirks;
unsigned long pgsize_bitmap;
unsigned int ias;
@ -103,15 +134,33 @@ struct io_pgtable_cfg {
u32 nmrr;
u32 prrr;
} arm_v7s_cfg;
struct {
u64 ttbr[2];
u64 tcr;
u64 mair[2];
void *pmds;
} av8l_fast_cfg;
struct {
enum tz_smmu_device_id sec_id;
int cbndx;
} arm_msm_secure_cfg;
};
};
/**
* struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
*
* @map: Map a physically contiguous memory region.
* @unmap: Unmap a physically contiguous memory region.
* @iova_to_phys: Translate iova to physical address.
* @map: Map a physically contiguous memory region.
* @map_sg: Map a scatterlist. Returns the number of bytes mapped,
* or 0 on failure. The size parameter contains the size
* of the partial mapping in case of failure.
* @unmap: Unmap a physically contiguous memory region.
* @iova_to_phys: Translate iova to physical address.
* @is_iova_coherent: Checks coherency of given IOVA. Returns True if coherent
* and False if non-coherent.
* @iova_to_pte: Translate iova to Page Table Entry (PTE).
*
* These functions map directly onto the iommu_ops member functions with
* the same names.
@ -121,8 +170,16 @@ struct io_pgtable_ops {
phys_addr_t paddr, size_t size, int prot);
size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
size_t size);
int (*map_sg)(struct io_pgtable_ops *ops, unsigned long iova,
struct scatterlist *sg, unsigned int nents,
int prot, size_t *size);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
bool (*is_iova_coherent)(struct io_pgtable_ops *ops,
unsigned long iova);
uint64_t (*iova_to_pte)(struct io_pgtable_ops *ops,
unsigned long iova);
};
/**
@ -173,17 +230,23 @@ struct io_pgtable {
static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
{
if (!iop->cfg.tlb)
return;
iop->cfg.tlb->tlb_flush_all(iop->cookie);
}
static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop,
unsigned long iova, size_t size, size_t granule, bool leaf)
{
if (!iop->cfg.tlb)
return;
iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie);
}
static inline void io_pgtable_tlb_sync(struct io_pgtable *iop)
{
if (!iop->cfg.tlb)
return;
iop->cfg.tlb->tlb_sync(iop->cookie);
}
@ -204,5 +267,31 @@ extern struct io_pgtable_init_fns io_pgtable_arm_32_lpae_s2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns;
extern struct io_pgtable_init_fns io_pgtable_av8l_fast_init_fns;
extern struct io_pgtable_init_fns io_pgtable_arm_msm_secure_init_fns;
/**
* io_pgtable_alloc_pages_exact:
* allocate an exact number of physically-contiguous pages.
* @size: the number of bytes to allocate
* @gfp_mask: GFP flags for the allocation
*
* Like alloc_pages_exact(), but with some additional accounting for debug
* purposes.
*/
void *io_pgtable_alloc_pages_exact(struct io_pgtable_cfg *cfg, void *cookie,
size_t size, gfp_t gfp_mask);
/**
* io_pgtable_free_pages_exact:
* release memory allocated via io_pgtable_alloc_pages_exact()
* @virt: the value returned by alloc_pages_exact.
* @size: size of allocation, same value as passed to alloc_pages_exact().
*
* Like free_pages_exact(), but with some additional accounting for debug
* purposes.
*/
void io_pgtable_free_pages_exact(struct io_pgtable_cfg *cfg, void *cookie,
void *virt, size_t size);
#endif /* __IO_PGTABLE_H */

2430
drivers/iommu/iommu-debug.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,27 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
*/
#ifndef IOMMU_DEBUG_H
#define IOMMU_DEBUG_H
#ifdef CONFIG_IOMMU_DEBUG_TRACKING
void iommu_debug_attach_device(struct iommu_domain *domain, struct device *dev);
void iommu_debug_domain_remove(struct iommu_domain *domain);
#else /* !CONFIG_IOMMU_DEBUG_TRACKING */
static inline void iommu_debug_attach_device(struct iommu_domain *domain,
struct device *dev)
{
}
static inline void iommu_debug_domain_remove(struct iommu_domain *domain)
{
}
#endif /* CONFIG_IOMMU_DEBUG_TRACKING */
#endif /* IOMMU_DEBUG_H */

View file

@ -31,9 +31,12 @@
#include <linux/err.h>
#include <linux/pci.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#include <linux/property.h>
#include <trace/events/iommu.h>
#include "iommu-debug.h"
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
@ -1166,7 +1169,6 @@ static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
if (err)
goto out_err;
return 0;
out_err:
@ -1265,6 +1267,7 @@ static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
domain->type = type;
/* Assume all sizes by default; the driver may override this later */
domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
domain->is_debug_domain = false;
return domain;
}
@ -1277,6 +1280,7 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
void iommu_domain_free(struct iommu_domain *domain)
{
iommu_debug_domain_remove(domain);
domain->ops->domain_free(domain);
}
EXPORT_SYMBOL_GPL(iommu_domain_free);
@ -1293,8 +1297,10 @@ static int __iommu_attach_device(struct iommu_domain *domain,
return -ENODEV;
ret = domain->ops->attach_dev(domain, dev);
if (!ret)
if (!ret) {
trace_attach_device_to_domain(dev);
iommu_debug_attach_device(domain, dev);
}
return ret;
}
@ -1474,8 +1480,34 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
}
EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
static size_t iommu_pgsize(struct iommu_domain *domain,
unsigned long addr_merge, size_t size)
phys_addr_t iommu_iova_to_phys_hard(struct iommu_domain *domain,
dma_addr_t iova)
{
if (unlikely(domain->ops->iova_to_phys_hard == NULL))
return 0;
return domain->ops->iova_to_phys_hard(domain, iova);
}
uint64_t iommu_iova_to_pte(struct iommu_domain *domain,
dma_addr_t iova)
{
if (unlikely(domain->ops->iova_to_pte == NULL))
return 0;
return domain->ops->iova_to_pte(domain, iova);
}
bool iommu_is_iova_coherent(struct iommu_domain *domain, dma_addr_t iova)
{
if (unlikely(domain->ops->is_iova_coherent == NULL))
return 0;
return domain->ops->is_iova_coherent(domain, iova);
}
size_t iommu_pgsize(unsigned long pgsize_bitmap,
unsigned long addr_merge, size_t size)
{
unsigned int pgsize_idx;
size_t pgsize;
@ -1494,10 +1526,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain,
pgsize = (1UL << (pgsize_idx + 1)) - 1;
/* throw away page sizes not supported by the hardware */
pgsize &= domain->pgsize_bitmap;
pgsize &= pgsize_bitmap;
/* make sure we're still sane */
BUG_ON(!pgsize);
if (!pgsize) {
pr_err("invalid pgsize/addr/size! 0x%lx 0x%lx 0x%zx\n",
pgsize_bitmap, addr_merge, size);
BUG();
}
/* pick the biggest page */
pgsize_idx = __fls(pgsize);
@ -1539,7 +1575,8 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
size_t pgsize = iommu_pgsize(domain->pgsize_bitmap,
iova | paddr, size);
pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
iova, &paddr, pgsize);
@ -1600,14 +1637,14 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
size_t left = size - unmapped;
unmapped_page = ops->unmap(domain, iova, pgsize);
unmapped_page = ops->unmap(domain, iova, left);
if (!unmapped_page)
break;
if (sync && ops->iotlb_range_add)
ops->iotlb_range_add(domain, iova, pgsize);
ops->iotlb_range_add(domain, iova, left);
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
@ -1742,12 +1779,20 @@ int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
}
EXPORT_SYMBOL_GPL(report_iommu_fault);
struct dentry *iommu_debugfs_top;
static int __init iommu_init(void)
{
iommu_group_kset = kset_create_and_add("iommu_groups",
NULL, kernel_kobj);
BUG_ON(!iommu_group_kset);
iommu_debugfs_top = debugfs_create_dir("iommu", NULL);
if (!iommu_debugfs_top) {
pr_err("Couldn't create iommu debugfs directory\n");
return -ENODEV;
}
return 0;
}
core_initcall(iommu_init);
@ -1833,6 +1878,46 @@ void iommu_put_resv_regions(struct device *dev, struct list_head *list)
ops->put_resv_regions(dev, list);
}
/**
* iommu_trigger_fault() - trigger an IOMMU fault
* @domain: iommu domain
*
* Triggers a fault on the device to which this domain is attached.
*
* This function should only be used for debugging purposes, for obvious
* reasons.
*/
void iommu_trigger_fault(struct iommu_domain *domain, unsigned long flags)
{
if (domain->ops->trigger_fault)
domain->ops->trigger_fault(domain, flags);
}
/**
* iommu_reg_read() - read an IOMMU register
*
* Reads the IOMMU register at the given offset.
*/
unsigned long iommu_reg_read(struct iommu_domain *domain, unsigned long offset)
{
if (domain->ops->reg_read)
return domain->ops->reg_read(domain, offset);
return 0;
}
/**
* iommu_reg_write() - write an IOMMU register
*
* Writes the given value to the IOMMU register at the given offset.
*/
void iommu_reg_write(struct iommu_domain *domain, unsigned long offset,
unsigned long val)
{
if (domain->ops->reg_write)
domain->ops->reg_write(domain, offset, val);
}
struct iommu_resv_region *iommu_alloc_resv_region(phys_addr_t start,
size_t length, int prot,
enum iommu_resv_type type)
@ -1976,3 +2061,23 @@ int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
return 0;
}
EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
/*
* Return the id asoociated with a pci device.
*/
int iommu_fwspec_get_id(struct device *dev, u32 *id)
{
struct iommu_fwspec *fwspec = dev->iommu_fwspec;
if (!fwspec)
return -EINVAL;
if (!dev_is_pci(dev))
return -EINVAL;
if (fwspec->num_ids != 1)
return -EINVAL;
*id = fwspec->ids[0];
return 0;
}

View file

@ -0,0 +1,473 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
*/
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <asm/barrier.h>
#include <linux/msm_dma_iommu_mapping.h>
/**
* struct msm_iommu_map - represents a mapping of an ion buffer to an iommu
* @lnode - list node to exist in the buffer's list of iommu mappings
* @dev - Device this is mapped to. Used as key
* @sgl - The scatterlist for this mapping
* @nents - Number of entries in sgl
* @dir - The direction for the map.
* @meta - Backpointer to the meta this guy belongs to.
* @ref - for reference counting this mapping
* @attrs - dma mapping attributes
* @buf_start_addr - address of start of buffer
*
* Represents a mapping of one dma_buf buffer to a particular device
* and address range. There may exist other mappings of this buffer in
* different devices. All mappings will have the same cacheability and security.
*/
struct msm_iommu_map {
struct list_head lnode;
struct rb_node node;
struct device *dev;
struct scatterlist *sgl;
unsigned int nents;
enum dma_data_direction dir;
struct msm_iommu_meta *meta;
struct kref ref;
unsigned long attrs;
dma_addr_t buf_start_addr;
};
struct msm_iommu_meta {
struct rb_node node;
struct list_head iommu_maps;
struct kref ref;
struct mutex lock;
void *buffer;
};
static struct rb_root iommu_root;
static DEFINE_MUTEX(msm_iommu_map_mutex);
static void msm_iommu_meta_add(struct msm_iommu_meta *meta)
{
struct rb_root *root = &iommu_root;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct msm_iommu_meta *entry;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct msm_iommu_meta, node);
if (meta->buffer < entry->buffer)
p = &(*p)->rb_left;
else if (meta->buffer > entry->buffer)
p = &(*p)->rb_right;
else
pr_err("%s: dma_buf %p already exists\n", __func__,
entry->buffer);
}
rb_link_node(&meta->node, parent, p);
rb_insert_color(&meta->node, root);
}
static struct msm_iommu_meta *msm_iommu_meta_lookup(void *buffer)
{
struct rb_root *root = &iommu_root;
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
struct msm_iommu_meta *entry = NULL;
while (*p) {
parent = *p;
entry = rb_entry(parent, struct msm_iommu_meta, node);
if (buffer < entry->buffer)
p = &(*p)->rb_left;
else if (buffer > entry->buffer)
p = &(*p)->rb_right;
else
return entry;
}
return NULL;
}
static void msm_iommu_add(struct msm_iommu_meta *meta,
struct msm_iommu_map *iommu)
{
INIT_LIST_HEAD(&iommu->lnode);
list_add(&iommu->lnode, &meta->iommu_maps);
}
static struct msm_iommu_map *msm_iommu_lookup(struct msm_iommu_meta *meta,
struct device *dev)
{
struct msm_iommu_map *entry;
list_for_each_entry(entry, &meta->iommu_maps, lnode) {
if (entry->dev == dev)
return entry;
}
return NULL;
}
static struct msm_iommu_meta *msm_iommu_meta_create(struct dma_buf *dma_buf)
{
struct msm_iommu_meta *meta;
meta = kzalloc(sizeof(*meta), GFP_KERNEL);
if (!meta)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&meta->iommu_maps);
meta->buffer = dma_buf->priv;
kref_init(&meta->ref);
mutex_init(&meta->lock);
msm_iommu_meta_add(meta);
return meta;
}
static void msm_iommu_meta_put(struct msm_iommu_meta *meta);
static struct scatterlist *clone_sgl(struct scatterlist *sg, int nents)
{
struct scatterlist *next, *s;
int i;
struct sg_table table;
if (sg_alloc_table(&table, nents, GFP_KERNEL))
return NULL;
next = table.sgl;
for_each_sg(sg, s, nents, i) {
*next = *s;
next = sg_next(next);
}
return table.sgl;
}
static inline int __msm_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf,
unsigned long attrs)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_meta *iommu_meta = NULL;
int ret = 0;
bool extra_meta_ref_taken = false;
int late_unmap = !(attrs & DMA_ATTR_NO_DELAYED_UNMAP);
mutex_lock(&msm_iommu_map_mutex);
iommu_meta = msm_iommu_meta_lookup(dma_buf->priv);
if (!iommu_meta) {
iommu_meta = msm_iommu_meta_create(dma_buf);
if (IS_ERR(iommu_meta)) {
mutex_unlock(&msm_iommu_map_mutex);
ret = PTR_ERR(iommu_meta);
goto out;
}
if (late_unmap) {
kref_get(&iommu_meta->ref);
extra_meta_ref_taken = true;
}
} else {
kref_get(&iommu_meta->ref);
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&iommu_meta->lock);
iommu_map = msm_iommu_lookup(iommu_meta, dev);
if (!iommu_map) {
iommu_map = kmalloc(sizeof(*iommu_map), GFP_ATOMIC);
if (!iommu_map) {
ret = -ENOMEM;
goto out_unlock;
}
ret = dma_map_sg_attrs(dev, sg, nents, dir, attrs);
if (!ret) {
kfree(iommu_map);
goto out_unlock;
}
iommu_map->sgl = clone_sgl(sg, nents);
if (!iommu_map->sgl) {
kfree(iommu_map);
ret = -ENOMEM;
goto out_unlock;
}
iommu_map->nents = nents;
iommu_map->dev = dev;
iommu_map->dir = dir;
iommu_map->attrs = attrs;
iommu_map->buf_start_addr = sg_phys(sg);
kref_init(&iommu_map->ref);
if (late_unmap)
kref_get(&iommu_map->ref);
iommu_map->meta = iommu_meta;
msm_iommu_add(iommu_meta, iommu_map);
} else {
if (nents == iommu_map->nents &&
dir == iommu_map->dir &&
(attrs & ~DMA_ATTR_SKIP_CPU_SYNC) ==
(iommu_map->attrs & ~DMA_ATTR_SKIP_CPU_SYNC) &&
sg_phys(sg) == iommu_map->buf_start_addr) {
struct scatterlist *sg_tmp = sg;
struct scatterlist *map_sg;
int i;
for_each_sg(iommu_map->sgl, map_sg, nents, i) {
sg_dma_address(sg_tmp) = sg_dma_address(map_sg);
sg_dma_len(sg_tmp) = sg_dma_len(map_sg);
if (sg_dma_len(map_sg) == 0)
break;
sg_tmp = sg_next(sg_tmp);
if (sg_tmp == NULL)
break;
}
kref_get(&iommu_map->ref);
if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
dma_sync_sg_for_device(dev, iommu_map->sgl,
iommu_map->nents, iommu_map->dir);
if (is_device_dma_coherent(dev))
/*
* Ensure all outstanding changes for coherent
* buffers are applied to the cache before any
* DMA occurs.
*/
dmb(ish);
ret = nents;
} else {
bool start_diff = (sg_phys(sg) !=
iommu_map->buf_start_addr);
dev_err(dev, "lazy map request differs:\n"
"req dir:%d, original dir:%d\n"
"req nents:%d, original nents:%d\n"
"req map attrs:%lu, original map attrs:%lu\n"
"req buffer start address differs:%d\n",
dir, iommu_map->dir, nents,
iommu_map->nents, attrs, iommu_map->attrs,
start_diff);
ret = -EINVAL;
}
}
mutex_unlock(&iommu_meta->lock);
return ret;
out_unlock:
mutex_unlock(&iommu_meta->lock);
out:
if (!IS_ERR(iommu_meta)) {
if (extra_meta_ref_taken)
msm_iommu_meta_put(iommu_meta);
msm_iommu_meta_put(iommu_meta);
}
return ret;
}
/*
* We are not taking a reference to the dma_buf here. It is expected that
* clients hold reference to the dma_buf until they are done with mapping and
* unmapping.
*/
int msm_dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_buf *dma_buf,
unsigned long attrs)
{
int ret;
if (IS_ERR_OR_NULL(dev)) {
pr_err("%s: dev pointer is invalid\n", __func__);
return -EINVAL;
}
if (IS_ERR_OR_NULL(sg)) {
pr_err("%s: sg table pointer is invalid\n", __func__);
return -EINVAL;
}
if (IS_ERR_OR_NULL(dma_buf)) {
pr_err("%s: dma_buf pointer is invalid\n", __func__);
return -EINVAL;
}
ret = __msm_dma_map_sg(dev, sg, nents, dir, dma_buf, attrs);
return ret;
}
EXPORT_SYMBOL(msm_dma_map_sg_attrs);
static void msm_iommu_meta_destroy(struct kref *kref)
{
struct msm_iommu_meta *meta = container_of(kref, struct msm_iommu_meta,
ref);
if (!list_empty(&meta->iommu_maps)) {
WARN(1, "%s: DMA Buffer %p being destroyed with outstanding iommu mappins!\n",
__func__, meta->buffer);
}
rb_erase(&meta->node, &iommu_root);
kfree(meta);
}
static void msm_iommu_meta_put(struct msm_iommu_meta *meta)
{
/*
* Need to lock here to prevent race against map/unmap
*/
mutex_lock(&msm_iommu_map_mutex);
kref_put(&meta->ref, msm_iommu_meta_destroy);
mutex_unlock(&msm_iommu_map_mutex);
}
static void msm_iommu_map_release(struct kref *kref)
{
struct msm_iommu_map *map = container_of(kref, struct msm_iommu_map,
ref);
struct sg_table table;
table.nents = table.orig_nents = map->nents;
table.sgl = map->sgl;
list_del(&map->lnode);
/* Skip an additional cache maintenance on the dma unmap path */
if (!(map->attrs & DMA_ATTR_SKIP_CPU_SYNC))
map->attrs |= DMA_ATTR_SKIP_CPU_SYNC;
dma_unmap_sg_attrs(map->dev, map->sgl, map->nents, map->dir,
map->attrs);
sg_free_table(&table);
kfree(map);
}
void msm_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf, unsigned long attrs)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_meta *meta;
mutex_lock(&msm_iommu_map_mutex);
meta = msm_iommu_meta_lookup(dma_buf->priv);
if (!meta) {
WARN(1, "%s: (%p) was never mapped\n", __func__, dma_buf);
mutex_unlock(&msm_iommu_map_mutex);
goto out;
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&meta->lock);
iommu_map = msm_iommu_lookup(meta, dev);
if (!iommu_map) {
WARN(1, "%s: (%p) was never mapped for device %p\n", __func__,
dma_buf, dev);
mutex_unlock(&meta->lock);
goto out;
}
if (dir != iommu_map->dir)
WARN(1, "%s: (%pK) dir:%d differs from original dir:%d\n",
__func__, dma_buf, dir, iommu_map->dir);
if (attrs && ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0))
dma_sync_sg_for_cpu(dev, iommu_map->sgl, iommu_map->nents, dir);
iommu_map->attrs = attrs;
kref_put(&iommu_map->ref, msm_iommu_map_release);
mutex_unlock(&meta->lock);
msm_iommu_meta_put(meta);
out:
return;
}
EXPORT_SYMBOL(msm_dma_unmap_sg_attrs);
int msm_dma_unmap_all_for_dev(struct device *dev)
{
int ret = 0;
struct msm_iommu_meta *meta;
struct rb_root *root;
struct rb_node *meta_node;
mutex_lock(&msm_iommu_map_mutex);
root = &iommu_root;
meta_node = rb_first(root);
while (meta_node) {
struct msm_iommu_map *iommu_map;
struct msm_iommu_map *iommu_map_next;
meta = rb_entry(meta_node, struct msm_iommu_meta, node);
mutex_lock(&meta->lock);
list_for_each_entry_safe(iommu_map, iommu_map_next,
&meta->iommu_maps, lnode)
if (iommu_map->dev == dev)
if (!kref_put(&iommu_map->ref,
msm_iommu_map_release))
ret = -EINVAL;
mutex_unlock(&meta->lock);
meta_node = rb_next(meta_node);
}
mutex_unlock(&msm_iommu_map_mutex);
return ret;
}
/*
* Only to be called by ION code when a buffer is freed
*/
void msm_dma_buf_freed(void *buffer)
{
struct msm_iommu_map *iommu_map;
struct msm_iommu_map *iommu_map_next;
struct msm_iommu_meta *meta;
mutex_lock(&msm_iommu_map_mutex);
meta = msm_iommu_meta_lookup(buffer);
if (!meta) {
/* Already unmapped (assuming no late unmapping) */
mutex_unlock(&msm_iommu_map_mutex);
return;
}
mutex_unlock(&msm_iommu_map_mutex);
mutex_lock(&meta->lock);
list_for_each_entry_safe(iommu_map, iommu_map_next, &meta->iommu_maps,
lnode)
kref_put(&iommu_map->ref, msm_iommu_map_release);
if (!list_empty(&meta->iommu_maps)) {
WARN(1, "%s: DMA buffer %p destroyed with outstanding iommu mappings\n",
__func__, meta->buffer);
}
INIT_LIST_HEAD(&meta->iommu_maps);
mutex_unlock(&meta->lock);
msm_iommu_meta_put(meta);
}

View file

@ -3,7 +3,6 @@
# QCOM Soc drivers
#
menu "Qualcomm SoC drivers"
config QCOM_COMMAND_DB
bool "Qualcomm Command DB"
depends on ARCH_QCOM || COMPILE_TEST
@ -146,6 +145,24 @@ config QCOM_WCNSS_CTRL
Client driver for the WCNSS_CTRL SMD channel, used to download nv
firmware to a newly booted WCNSS chip.
config QCOM_SECURE_BUFFER
bool "Helper functions for securing buffers through TZ"
help
Say 'Y' here for targets that need to call into TZ to secure
memory buffers. This ensures that only the correct clients can
use this memory and no unauthorized access is made to the
buffer
config MSM_TZ_SMMU
depends on ARCH_MSM8953 || ARCH_QCS405
bool "Helper functions for SMMU configuration through TZ"
help
Say 'Y' here for targets that need to call into TZ to configure
SMMUs for any reason (for example, for errata workarounds or
configuration of SMMU virtualization).
If unsure, say N.
config MSM_SERVICE_LOCATOR
bool "Service Locator"
depends on MSM_QMI_INTERFACE

View file

@ -16,6 +16,8 @@ obj-$(CONFIG_QCOM_SMP2P) += smp2p.o
obj-$(CONFIG_QCOM_SMSM) += smsm.o
obj-$(CONFIG_QCOM_WCNSS_CTRL) += wcnss_ctrl.o
obj-$(CONFIG_QCOM_APR) += apr.o
obj-$(CONFIG_QCOM_SECURE_BUFFER) += secure_buffer.o
obj-$(CONFIG_MSM_TZ_SMMU) += msm_tz_smmu.o
CFLAGS_scm.o :=$(call as-instr,.arch_extension sec,-DREQUIRES_SEC=1)
obj-$(CONFIG_QCOM_SCM) += scm.o
obj-$(CONFIG_MSM_BOOT_STATS) += boot_stats.o

View file

@ -0,0 +1,442 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2011 Google, Inc
* Copyright (c) 2011-2018, The Linux Foundation. All rights reserved.
*/
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <soc/qcom/scm.h>
#include <soc/qcom/secure_buffer.h>
DEFINE_MUTEX(secure_buffer_mutex);
struct cp2_mem_chunks {
u32 chunk_list;
u32 chunk_list_size;
u32 chunk_size;
} __attribute__ ((__packed__));
struct cp2_lock_req {
struct cp2_mem_chunks chunks;
u32 mem_usage;
u32 lock;
} __attribute__ ((__packed__));
struct mem_prot_info {
phys_addr_t addr;
u64 size;
};
#define MEM_PROT_ASSIGN_ID 0x16
#define MEM_PROTECT_LOCK_ID2 0x0A
#define MEM_PROTECT_LOCK_ID2_FLAT 0x11
#define V2_CHUNK_SIZE SZ_1M
#define FEATURE_ID_CP 12
struct dest_vm_and_perm_info {
u32 vm;
u32 perm;
u64 ctx;
u32 ctx_size;
};
static void *qcom_secure_mem;
#define QCOM_SECURE_MEM_SIZE (512*1024)
static int secure_buffer_change_chunk(u32 chunks,
u32 nchunks,
u32 chunk_size,
int lock)
{
struct cp2_lock_req request;
u32 resp;
int ret;
struct scm_desc desc = {0};
desc.args[0] = request.chunks.chunk_list = chunks;
desc.args[1] = request.chunks.chunk_list_size = nchunks;
desc.args[2] = request.chunks.chunk_size = chunk_size;
/* Usage is now always 0 */
desc.args[3] = request.mem_usage = 0;
desc.args[4] = request.lock = lock;
desc.args[5] = 0;
desc.arginfo = SCM_ARGS(6, SCM_RW, SCM_VAL, SCM_VAL, SCM_VAL, SCM_VAL,
SCM_VAL);
kmap_flush_unused();
kmap_atomic_flush_unused();
ret = scm_call2(SCM_SIP_FNID(SCM_SVC_MP,
MEM_PROTECT_LOCK_ID2_FLAT), &desc);
resp = desc.ret[0];
return ret;
}
static int secure_buffer_change_table(struct sg_table *table, int lock)
{
int i, j;
int ret = -EINVAL;
u32 *chunk_list;
struct scatterlist *sg;
for_each_sg(table->sgl, sg, table->nents, i) {
int nchunks;
int size = sg->length;
int chunk_list_len;
phys_addr_t chunk_list_phys;
/*
* This should theoretically be a phys_addr_t but the protocol
* indicates this should be a u32.
*/
u32 base;
u64 tmp = sg_dma_address(sg);
WARN((tmp >> 32) & 0xffffffff,
"%s: there are ones in the upper 32 bits of the sg at %pK! They will be truncated! Address: 0x%llx\n",
__func__, sg, tmp);
if (unlikely(!size || (size % V2_CHUNK_SIZE))) {
WARN(1,
"%s: chunk %d has invalid size: 0x%x. Must be a multiple of 0x%x\n",
__func__, i, size, V2_CHUNK_SIZE);
return -EINVAL;
}
base = (u32)tmp;
nchunks = size / V2_CHUNK_SIZE;
chunk_list_len = sizeof(u32)*nchunks;
chunk_list = kzalloc(chunk_list_len, GFP_KERNEL);
if (!chunk_list)
return -ENOMEM;
chunk_list_phys = virt_to_phys(chunk_list);
for (j = 0; j < nchunks; j++)
chunk_list[j] = base + j * V2_CHUNK_SIZE;
/*
* Flush the chunk list before sending the memory to the
* secure environment to ensure the data is actually present
* in RAM
*/
dmac_flush_range(chunk_list, chunk_list + chunk_list_len);
ret = secure_buffer_change_chunk(chunk_list_phys,
nchunks, V2_CHUNK_SIZE, lock);
if (!ret) {
/*
* Set or clear the private page flag to communicate the
* status of the chunk to other entities
*/
if (lock)
SetPagePrivate(sg_page(sg));
else
ClearPagePrivate(sg_page(sg));
}
kfree(chunk_list);
}
return ret;
}
int msm_secure_table(struct sg_table *table)
{
int ret;
mutex_lock(&secure_buffer_mutex);
ret = secure_buffer_change_table(table, 1);
mutex_unlock(&secure_buffer_mutex);
return ret;
}
int msm_unsecure_table(struct sg_table *table)
{
int ret;
mutex_lock(&secure_buffer_mutex);
ret = secure_buffer_change_table(table, 0);
mutex_unlock(&secure_buffer_mutex);
return ret;
}
static struct dest_vm_and_perm_info *
populate_dest_info(int *dest_vmids, int nelements, int *dest_perms,
size_t *size_in_bytes)
{
struct dest_vm_and_perm_info *dest_info;
int i;
size_t size;
/* Ensure allocated size is less than PAGE_ALLOC_COSTLY_ORDER */
size = nelements * sizeof(*dest_info);
if (size > PAGE_SIZE)
return NULL;
dest_info = kzalloc(size, GFP_KERNEL);
if (!dest_info)
return NULL;
for (i = 0; i < nelements; i++) {
dest_info[i].vm = dest_vmids[i];
dest_info[i].perm = dest_perms[i];
dest_info[i].ctx = 0x0;
dest_info[i].ctx_size = 0;
}
*size_in_bytes = size;
return dest_info;
}
/* Must hold secure_buffer_mutex while allocated buffer is in use */
static struct mem_prot_info *get_info_list_from_table(struct sg_table *table,
size_t *size_in_bytes)
{
int i;
struct scatterlist *sg;
struct mem_prot_info *info;
size_t size;
size = table->nents * sizeof(*info);
if (size >= QCOM_SECURE_MEM_SIZE) {
pr_err("%s: Not enough memory allocated. Required size %zd\n",
__func__, size);
return NULL;
}
if (!qcom_secure_mem) {
pr_err("%s is not functional as qcom_secure_mem is not allocated.\n",
__func__);
return NULL;
}
/* "Allocate" it */
info = qcom_secure_mem;
for_each_sg(table->sgl, sg, table->nents, i) {
info[i].addr = page_to_phys(sg_page(sg));
info[i].size = sg->length;
}
*size_in_bytes = size;
return info;
}
#define BATCH_MAX_SIZE SZ_2M
#define BATCH_MAX_SECTIONS 32
int hyp_assign_table(struct sg_table *table,
u32 *source_vm_list, int source_nelems,
int *dest_vmids, int *dest_perms,
int dest_nelems)
{
int ret = 0;
struct scm_desc desc = {0};
u32 *source_vm_copy;
size_t source_vm_copy_size;
struct dest_vm_and_perm_info *dest_vm_copy;
size_t dest_vm_copy_size;
struct mem_prot_info *sg_table_copy;
size_t sg_table_copy_size;
int batch_start, batch_end;
u64 batch_size;
/*
* We can only pass cache-aligned sizes to hypervisor, so we need
* to kmalloc and memcpy the source_vm_list here.
*/
source_vm_copy_size = sizeof(*source_vm_copy) * source_nelems;
source_vm_copy = kzalloc(source_vm_copy_size, GFP_KERNEL);
if (!source_vm_copy)
return -ENOMEM;
memcpy(source_vm_copy, source_vm_list, source_vm_copy_size);
dest_vm_copy = populate_dest_info(dest_vmids, dest_nelems, dest_perms,
&dest_vm_copy_size);
if (!dest_vm_copy) {
ret = -ENOMEM;
goto out_free;
}
mutex_lock(&secure_buffer_mutex);
sg_table_copy = get_info_list_from_table(table, &sg_table_copy_size);
if (!sg_table_copy) {
ret = -ENOMEM;
goto out_unlock;
}
desc.args[0] = virt_to_phys(sg_table_copy);
desc.args[1] = sg_table_copy_size;
desc.args[2] = virt_to_phys(source_vm_copy);
desc.args[3] = source_vm_copy_size;
desc.args[4] = virt_to_phys(dest_vm_copy);
desc.args[5] = dest_vm_copy_size;
desc.args[6] = 0;
desc.arginfo = SCM_ARGS(7, SCM_RO, SCM_VAL, SCM_RO, SCM_VAL, SCM_RO,
SCM_VAL, SCM_VAL);
dmac_flush_range(source_vm_copy,
(void *)source_vm_copy + source_vm_copy_size);
dmac_flush_range(sg_table_copy,
(void *)sg_table_copy + sg_table_copy_size);
dmac_flush_range(dest_vm_copy,
(void *)dest_vm_copy + dest_vm_copy_size);
batch_start = 0;
while (batch_start < table->nents) {
/* Ensure no size zero batches */
batch_size = sg_table_copy[batch_start].size;
batch_end = batch_start + 1;
while (1) {
u64 size;
if (batch_end >= table->nents)
break;
if (batch_end - batch_start >= BATCH_MAX_SECTIONS)
break;
size = sg_table_copy[batch_end].size;
if (size + batch_size >= BATCH_MAX_SIZE)
break;
batch_size += size;
batch_end++;
}
desc.args[0] = virt_to_phys(&sg_table_copy[batch_start]);
desc.args[1] = (batch_end - batch_start) *
sizeof(sg_table_copy[0]);
ret = scm_call2(SCM_SIP_FNID(SCM_SVC_MP,
MEM_PROT_ASSIGN_ID), &desc);
if (ret) {
pr_info("%s: Failed to assign memory protection, ret = %d\n",
__func__, ret);
break;
}
batch_start = batch_end;
}
out_unlock:
mutex_unlock(&secure_buffer_mutex);
kfree(dest_vm_copy);
out_free:
kfree(source_vm_copy);
return ret;
}
int hyp_assign_phys(phys_addr_t addr, u64 size, u32 *source_vm_list,
int source_nelems, int *dest_vmids,
int *dest_perms, int dest_nelems)
{
struct sg_table table;
int ret;
ret = sg_alloc_table(&table, 1, GFP_KERNEL);
if (ret)
return ret;
sg_set_page(table.sgl, phys_to_page(addr), size, 0);
ret = hyp_assign_table(&table, source_vm_list, source_nelems,
dest_vmids, dest_perms, dest_nelems);
sg_free_table(&table);
return ret;
}
EXPORT_SYMBOL(hyp_assign_phys);
const char *msm_secure_vmid_to_string(int secure_vmid)
{
switch (secure_vmid) {
case VMID_HLOS:
return "VMID_HLOS";
case VMID_CP_TOUCH:
return "VMID_CP_TOUCH";
case VMID_CP_BITSTREAM:
return "VMID_CP_BITSTREAM";
case VMID_CP_PIXEL:
return "VMID_CP_PIXEL";
case VMID_CP_NON_PIXEL:
return "VMID_CP_NON_PIXEL";
case VMID_CP_CAMERA:
return "VMID_CP_CAMERA";
case VMID_HLOS_FREE:
return "VMID_HLOS_FREE";
case VMID_MSS_MSA:
return "VMID_MSS_MSA";
case VMID_MSS_NONMSA:
return "VMID_MSS_NONMSA";
case VMID_CP_SEC_DISPLAY:
return "VMID_CP_SEC_DISPLAY";
case VMID_CP_APP:
return "VMID_CP_APP";
case VMID_WLAN:
return "VMID_WLAN";
case VMID_WLAN_CE:
return "VMID_WLAN_CE";
case VMID_CP_CAMERA_PREVIEW:
return "VMID_CP_CAMERA_PREVIEW";
case VMID_CP_SPSS_SP:
return "VMID_CP_SPSS_SP";
case VMID_CP_SPSS_SP_SHARED:
return "VMID_CP_SPSS_SP_SHARED";
case VMID_CP_SPSS_HLOS_SHARED:
return "VMID_CP_SPSS_HLOS_SHARED";
case VMID_INVAL:
return "VMID_INVAL";
default:
return "Unknown VMID";
}
}
#define MAKE_CP_VERSION(major, minor, patch) \
(((major & 0x3FF) << 22) | ((minor & 0x3FF) << 12) | (patch & 0xFFF))
bool msm_secure_v2_is_supported(void)
{
/*
* if the version is < 1.1.0 then dynamic buffer allocation is
* not supported
*/
return (scm_get_feat_version(FEATURE_ID_CP) >=
MAKE_CP_VERSION(1, 1, 0));
}
static int __init alloc_secure_shared_memory(void)
{
int ret = 0;
dma_addr_t dma_handle;
qcom_secure_mem = kzalloc(QCOM_SECURE_MEM_SIZE, GFP_KERNEL);
if (!qcom_secure_mem) {
/* Fallback to CMA-DMA memory */
qcom_secure_mem = dma_alloc_coherent(NULL, QCOM_SECURE_MEM_SIZE,
&dma_handle, GFP_KERNEL);
if (!qcom_secure_mem) {
pr_err("Couldn't allocate memory for secure use-cases. hyp_assign_table will not work\n");
return -ENOMEM;
}
}
return ret;
}
pure_initcall(alloc_secure_shared_memory);

View file

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2018, The Linux Foundation. All rights reserved.
*/
#ifndef __ARM_SMMU_ERRATA_H__
#define __ARM_SMMU_ERRATA_H__
#define ARM_SMMU_MIN_IOVA_ALIGN SZ_16K
#define ARM_SMMU_GUARD_PROT (IOMMU_READ | IOMMU_WRITE | IOMMU_GUARD)
#ifdef CONFIG_ARM_SMMU
struct page *arm_smmu_errata_get_guard_page(int vmid);
#else
static inline struct page *arm_smmu_errata_get_guard_page(
int vmid)
{
return NULL;
}
#endif
#endif

View file

@ -0,0 +1,55 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
*/
#ifndef __LINUX_DMA_MAPPING_FAST_H
#define __LINUX_DMA_MAPPING_FAST_H
#include <linux/iommu.h>
#include <linux/io-pgtable-fast.h>
struct dma_iommu_mapping;
struct dma_fast_smmu_mapping {
struct device *dev;
struct iommu_domain *domain;
dma_addr_t base;
size_t size;
size_t num_4k_pages;
u32 min_iova_align;
struct page *guard_page;
unsigned int bitmap_size;
unsigned long *bitmap;
unsigned long next_start;
unsigned long upcoming_stale_bit;
bool have_stale_tlbs;
dma_addr_t pgtbl_dma_handle;
av8l_fast_iopte *pgtbl_pmds;
spinlock_t lock;
struct notifier_block notifier;
int is_smmu_pt_coherent;
};
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST
int fast_smmu_init_mapping(struct device *dev,
struct dma_iommu_mapping *mapping);
void fast_smmu_release_mapping(struct kref *kref);
#else
static inline int fast_smmu_init_mapping(struct device *dev,
struct dma_iommu_mapping *mapping)
{
return -ENODEV;
}
static inline void fast_smmu_release_mapping(struct kref *kref)
{
}
#endif
#endif /* __LINUX_DMA_MAPPING_FAST_H */

View file

@ -62,13 +62,64 @@
* allocation failure reports (similarly to __GFP_NOWARN).
*/
#define DMA_ATTR_NO_WARN (1UL << 8)
/*
* DMA_ATTR_STRONGLY_ORDERED: Specifies that accesses to the mapping must
* not be buffered, reordered, merged with other accesses, or unaligned.
* No speculative access may occur in this mapping.
*/
#define DMA_ATTR_STRONGLY_ORDERED (1UL << 9)
/*
* DMA_ATTR_SKIP_ZEROING: Do not zero mapping.
*/
#define DMA_ATTR_SKIP_ZEROING (1UL << 10)
/*
* DMA_ATTR_NO_DELAYED_UNMAP: Used by msm specific lazy mapping to indicate
* that the mapping can be freed on unmap, rather than when the ion_buffer
* is freed.
*/
#define DMA_ATTR_NO_DELAYED_UNMAP (1UL << 11)
/*
* DMA_ATTR_EXEC_MAPPING: The mapping has executable permissions.
*/
#define DMA_ATTR_EXEC_MAPPING (1UL << 12)
/*
* DMA_ATTR_IOMMU_USE_UPSTREAM_HINT: Normally an smmu will override any bus
* attributes (i.e cacheablilty) provided by the client device. Some hardware
* may be designed to use the original attributes instead.
*/
#define DMA_ATTR_IOMMU_USE_UPSTREAM_HINT (1UL << 13)
/*
* When passed to a DMA map call the DMA_ATTR_FORCE_COHERENT DMA
* attribute can be used to force a buffer to be mapped as IO coherent.
*/
#define DMA_ATTR_FORCE_COHERENT (1UL << 14)
/*
* When passed to a DMA map call the DMA_ATTR_FORCE_NON_COHERENT DMA
* attribute can be used to force a buffer to not be mapped as IO
* coherent.
*/
#define DMA_ATTR_FORCE_NON_COHERENT (1UL << 15)
/*
* DMA_ATTR_DELAYED_UNMAP: Used by ION, it will ensure that mappings are not
* removed on unmap but instead are removed when the ion_buffer is freed.
*/
#define DMA_ATTR_DELAYED_UNMAP (1UL << 16)
/*
* DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully
* accessible at an elevated privilege level (and ideally inaccessible or
* at least read-only at lesser-privileged levels).
*/
#define DMA_ATTR_PRIVILEGED (1UL << 9)
#define DMA_ATTR_PRIVILEGED (1UL << 17)
/*
* DMA_ATTR_IOMMU_USE_LLC_NWA: Overrides the bus attributes to use the System
* Cache(LLC) with allocation policy as Inner Non-Cacheable, Outer Cacheable:
* Write-Back, Read-Allocate, No Write-Allocate policy.
*/
#define DMA_ATTR_IOMMU_USE_LLC_NWA (1UL << 18)
#define DMA_ERROR_CODE (~(dma_addr_t)0)
/*
* A dma_addr_t can hold any valid DMA or bus address for the platform.
@ -130,6 +181,11 @@ struct dma_map_ops {
enum dma_data_direction direction);
int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
int (*dma_supported)(struct device *dev, u64 mask);
int (*set_dma_mask)(struct device *dev, u64 mask);
void *(*remap)(struct device *dev, void *cpu_addr, dma_addr_t handle,
size_t size, unsigned long attrs);
void (*unremap)(struct device *dev, void *remapped_address,
size_t size);
#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
u64 (*get_required_mask)(struct device *dev);
#endif
@ -454,7 +510,8 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
void *dma_common_pages_remap(struct page **pages, size_t size,
unsigned long vm_flags, pgprot_t prot,
const void *caller);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags);
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags,
bool nowarn);
/**
* dma_mmap_attrs - map a coherent DMA allocation into user space
@ -592,6 +649,11 @@ static inline int dma_supported(struct device *dev, u64 mask)
#ifndef HAVE_ARCH_DMA_SET_MASK
static inline int dma_set_mask(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (ops->set_dma_mask)
return ops->set_dma_mask(dev, mask);
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
@ -601,6 +663,35 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
return 0;
}
#endif
static inline void *dma_remap(struct device *dev, void *cpu_addr,
dma_addr_t dma_handle, size_t size, unsigned long attrs)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!ops->remap) {
WARN_ONCE(1, "Remap function not implemented for %pS\n",
ops->remap);
return NULL;
}
return ops->remap(dev, cpu_addr, dma_handle, size, attrs);
}
static inline void dma_unremap(struct device *dev, void *remapped_addr,
size_t size)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
if (!ops->unremap) {
WARN_ONCE(1, "unremap function not implemented for %pS\n",
ops->unremap);
return;
}
return ops->unremap(dev, remapped_addr, size);
}
static inline u64 dma_get_mask(struct device *dev)
{

View file

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2018, The Linux Foundation. All rights reserved.
*/
#ifndef __LINUX_IO_PGTABLE_FAST_H
#define __LINUX_IO_PGTABLE_FAST_H
#include <linux/notifier.h>
typedef u64 av8l_fast_iopte;
#define iopte_pmd_offset(pmds, iova) (pmds + (iova >> 12))
int av8l_fast_map_public(av8l_fast_iopte *ptep, phys_addr_t paddr, size_t size,
int prot);
void av8l_fast_unmap_public(av8l_fast_iopte *ptep, size_t size);
/* events for notifiers passed to av8l_register_notify */
#define MAPPED_OVER_STALE_TLB 1
#ifdef CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB
/*
* Doesn't matter what we use as long as bit 0 is unset. The reason why we
* need a different value at all is that there are certain hardware
* platforms with erratum that require that a PTE actually be zero'd out
* and not just have its valid bit unset.
*/
#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0xa
void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds, bool skip_sync);
void av8l_register_notify(struct notifier_block *nb);
#else /* !CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
#define AV8L_FAST_PTE_UNMAPPED_NEED_TLBI 0
static inline void av8l_fast_clear_stale_ptes(av8l_fast_iopte *puds,
bool skip_sync)
{
}
static inline void av8l_register_notify(struct notifier_block *nb)
{
}
#endif /* CONFIG_IOMMU_IO_PGTABLE_FAST_PROVE_TLB */
#endif /* __LINUX_IO_PGTABLE_FAST_H */

View file

@ -41,6 +41,12 @@
* if the IOMMU page table format is equivalent.
*/
#define IOMMU_PRIV (1 << 5)
#define IOMMU_GUARD (1 << 28) /* Guard Page */
/* Use upstream device's bus attribute */
#define IOMMU_USE_UPSTREAM_HINT (1 << 6)
/* Use upstream device's bus attribute with no write-allocate cache policy */
#define IOMMU_USE_LLC_NWA (1 << 7)
struct iommu_ops;
struct iommu_group;
@ -50,8 +56,12 @@ struct iommu_domain;
struct notifier_block;
/* iommu fault flags */
#define IOMMU_FAULT_READ 0x0
#define IOMMU_FAULT_WRITE 0x1
#define IOMMU_FAULT_READ (1 << 0)
#define IOMMU_FAULT_WRITE (1 << 1)
#define IOMMU_FAULT_TRANSLATION (1 << 2)
#define IOMMU_FAULT_PERMISSION (1 << 3)
#define IOMMU_FAULT_EXTERNAL (1 << 4)
#define IOMMU_FAULT_TRANSACTION_STALLED (1 << 5)
typedef int (*iommu_fault_handler_t)(struct iommu_domain *,
struct device *, unsigned long, int, void *);
@ -62,6 +72,10 @@ struct iommu_domain_geometry {
bool force_aperture; /* DMA only allowed in mappable range? */
};
struct iommu_pgtbl_info {
void *pmds;
};
/* Domain feature flags */
#define __IOMMU_DOMAIN_PAGING (1U << 0) /* Support for iommu_map/unmap */
#define __IOMMU_DOMAIN_DMA_API (1U << 1) /* Domain for use in DMA-API
@ -94,6 +108,7 @@ struct iommu_domain {
void *handler_token;
struct iommu_domain_geometry geometry;
void *iova_cookie;
bool is_debug_domain;
};
enum iommu_cap {
@ -124,6 +139,26 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMU_ENABLE,
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_PT_BASE_ADDR,
DOMAIN_ATTR_CONTEXT_BANK,
DOMAIN_ATTR_DYNAMIC,
DOMAIN_ATTR_TTBR0,
DOMAIN_ATTR_CONTEXTIDR,
DOMAIN_ATTR_PROCID,
DOMAIN_ATTR_NON_FATAL_FAULTS,
DOMAIN_ATTR_S1_BYPASS,
DOMAIN_ATTR_ATOMIC,
DOMAIN_ATTR_SECURE_VMID,
DOMAIN_ATTR_FAST,
DOMAIN_ATTR_PGTBL_INFO,
DOMAIN_ATTR_USE_UPSTREAM_HINT,
DOMAIN_ATTR_EARLY_MAP,
DOMAIN_ATTR_PAGE_TABLE_IS_COHERENT,
DOMAIN_ATTR_PAGE_TABLE_FORCE_COHERENT,
DOMAIN_ATTR_CB_STALL_DISABLE,
DOMAIN_ATTR_BITMAP_IOVA_ALLOCATOR,
DOMAIN_ATTR_QCOM_MMU500_ERRATA_MIN_IOVA_ALIGN,
DOMAIN_ATTR_USE_LLC_NWA,
DOMAIN_ATTR_MAX,
};
@ -155,6 +190,8 @@ struct iommu_resv_region {
enum iommu_resv_type type;
};
extern struct dentry *iommu_debugfs_top;
#ifdef CONFIG_IOMMU_API
/**
@ -173,6 +210,7 @@ struct iommu_resv_region {
* @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush
* queue
* @iova_to_phys: translate iova to physical address
* @iova_to_phys_hard: translate iova to physical address using IOMMU hardware
* @add_device: add device to iommu grouping
* @remove_device: remove device from iommu grouping
* @device_group: find iommu group for a particular device
@ -187,6 +225,12 @@ struct iommu_resv_region {
* @domain_get_windows: Return the number of windows for a domain
* @of_xlate: add OF master IDs to iommu grouping
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @trigger_fault: trigger a fault on the device attached to an iommu domain
* @reg_read: read an IOMMU register
* @reg_write: write an IOMMU register
* @tlbi_domain: Invalidate all TLBs covering an iommu domain
* @enable_config_clocks: Enable all config clocks for this domain's IOMMU
* @disable_config_clocks: Disable all config clocks for this domain's IOMMU
*/
struct iommu_ops {
bool (*capable)(enum iommu_cap);
@ -208,6 +252,8 @@ struct iommu_ops {
unsigned long iova, size_t size);
void (*iotlb_sync)(struct iommu_domain *domain);
phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
phys_addr_t (*iova_to_phys_hard)(struct iommu_domain *domain,
dma_addr_t iova);
int (*add_device)(struct device *dev);
void (*remove_device)(struct device *dev);
struct iommu_group *(*device_group)(struct device *dev);
@ -231,10 +277,21 @@ struct iommu_ops {
int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
/* Get the number of windows per domain */
u32 (*domain_get_windows)(struct iommu_domain *domain);
void (*trigger_fault)(struct iommu_domain *domain, unsigned long flags);
unsigned long (*reg_read)(struct iommu_domain *domain,
unsigned long offset);
void (*reg_write)(struct iommu_domain *domain, unsigned long val,
unsigned long offset);
void (*tlbi_domain)(struct iommu_domain *domain);
int (*enable_config_clocks)(struct iommu_domain *domain);
void (*disable_config_clocks)(struct iommu_domain *domain);
uint64_t (*iova_to_pte)(struct iommu_domain *domain,
dma_addr_t iova);
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
bool (*is_iova_coherent)(struct iommu_domain *domain, dma_addr_t iova);
unsigned long pgsize_bitmap;
};
@ -297,6 +354,8 @@ extern int iommu_attach_device(struct iommu_domain *domain,
extern void iommu_detach_device(struct iommu_domain *domain,
struct device *dev);
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
extern size_t iommu_pgsize(unsigned long pgsize_bitmap,
unsigned long addr_merge, size_t size);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@ -307,6 +366,10 @@ extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long io
struct scatterlist *sg,unsigned int nents,
int prot);
extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
extern phys_addr_t iommu_iova_to_phys_hard(struct iommu_domain *domain,
dma_addr_t iova);
extern bool iommu_is_iova_coherent(struct iommu_domain *domain,
dma_addr_t iova);
extern void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler, void *token);
@ -356,6 +419,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
int prot);
extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr);
extern uint64_t iommu_iova_to_pte(struct iommu_domain *domain,
dma_addr_t iova);
extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev,
unsigned long iova, int flags);
@ -385,11 +451,37 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain,
return domain->ops->map_sg(domain, iova, sg, nents, prot);
}
extern void iommu_trigger_fault(struct iommu_domain *domain,
unsigned long flags);
extern unsigned long iommu_reg_read(struct iommu_domain *domain,
unsigned long offset);
extern void iommu_reg_write(struct iommu_domain *domain, unsigned long offset,
unsigned long val);
/* PCI device grouping function */
extern struct iommu_group *pci_device_group(struct device *dev);
/* Generic device grouping function */
extern struct iommu_group *generic_device_group(struct device *dev);
static inline void iommu_tlbiall(struct iommu_domain *domain)
{
if (domain->ops->tlbi_domain)
domain->ops->tlbi_domain(domain);
}
static inline int iommu_enable_config_clocks(struct iommu_domain *domain)
{
if (domain->ops->enable_config_clocks)
return domain->ops->enable_config_clocks(domain);
return 0;
}
static inline void iommu_disable_config_clocks(struct iommu_domain *domain)
{
if (domain->ops->disable_config_clocks)
domain->ops->disable_config_clocks(domain);
}
/**
* struct iommu_fwspec - per-device IOMMU instance data
* @ops: ops for this device's IOMMU
@ -411,6 +503,8 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
void iommu_fwspec_free(struct device *dev);
int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode);
int iommu_fwspec_get_id(struct device *dev, u32 *id);
int iommu_is_available(struct device *dev);
#else /* CONFIG_IOMMU_API */
@ -514,6 +608,18 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad
return 0;
}
static inline phys_addr_t iommu_iova_to_phys_hard(struct iommu_domain *domain,
dma_addr_t iova)
{
return 0;
}
static inline bool iommu_is_iova_coherent(struct iommu_domain *domain,
dma_addr_t iova)
{
return 0;
}
static inline void iommu_set_fault_handler(struct iommu_domain *domain,
iommu_fault_handler_t handler, void *token)
{
@ -673,6 +779,35 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link)
{
}
static inline void iommu_trigger_fault(struct iommu_domain *domain,
unsigned long flags)
{
}
static inline unsigned long iommu_reg_read(struct iommu_domain *domain,
unsigned long offset)
{
return 0;
}
static inline void iommu_reg_write(struct iommu_domain *domain,
unsigned long val, unsigned long offset)
{
}
static inline void iommu_tlbiall(struct iommu_domain *domain)
{
}
static inline int iommu_enable_config_clocks(struct iommu_domain *domain)
{
return 0;
}
static inline void iommu_disable_config_clocks(struct iommu_domain *domain)
{
}
static inline int iommu_fwspec_init(struct device *dev,
struct fwnode_handle *iommu_fwnode,
const struct iommu_ops *ops)
@ -696,6 +831,15 @@ const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
return NULL;
}
static inline int iommu_fwspec_get_id(struct device *dev, u32 *id)
{
return -ENODEV;
}
static inline int iommu_is_available(struct device *dev)
{
return -ENODEV;
}
#endif /* CONFIG_IOMMU_API */
#endif /* __LINUX_IOMMU_H */

View file

@ -0,0 +1,109 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015-2016, 2018 The Linux Foundation. All rights reserved.
*/
#ifndef _LINUX_MSM_DMA_IOMMU_MAPPING_H
#define _LINUX_MSM_DMA_IOMMU_MAPPING_H
#include <linux/device.h>
#include <linux/dma-buf.h>
#include <linux/scatterlist.h>
#include <linux/dma-mapping.h>
#ifdef CONFIG_QCOM_LAZY_MAPPING
/*
* This function is not taking a reference to the dma_buf here. It is expected
* that clients hold reference to the dma_buf until they are done with mapping
* and unmapping.
*/
int msm_dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_buf *dma_buf,
unsigned long attrs);
/*
* This function takes an extra reference to the dma_buf.
* What this means is that calling msm_dma_unmap_sg will not result in buffer's
* iommu mapping being removed, which means that subsequent calls to lazy map
* will simply re-use the existing iommu mapping.
* The iommu unmapping of the buffer will occur when the ION buffer is
* destroyed.
* Using lazy mapping can provide a performance benefit because subsequent
* mappings are faster.
*
* The limitation of using this API are that all subsequent iommu mappings
* must be the same as the original mapping, ie they must map the same part of
* the buffer with the same dma data direction. Also there can't be multiple
* mappings of different parts of the buffer.
*/
static inline int msm_dma_map_sg_lazy(struct device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction dir,
struct dma_buf *dma_buf)
{
return msm_dma_map_sg_attrs(dev, sg, nents, dir, dma_buf, 0);
}
static inline int msm_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf)
{
unsigned long attrs;
attrs = DMA_ATTR_NO_DELAYED_UNMAP;
return msm_dma_map_sg_attrs(dev, sg, nents, dir, dma_buf, attrs);
}
void msm_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf, unsigned long attrs);
int msm_dma_unmap_all_for_dev(struct device *dev);
/*
* Below is private function only to be called by framework (ION) and not by
* clients.
*/
void msm_dma_buf_freed(void *buffer);
#else /*CONFIG_QCOM_LAZY_MAPPING*/
static inline int msm_dma_map_sg_attrs(struct device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction dir, struct dma_buf *dma_buf,
unsigned long attrs)
{
return -EINVAL;
}
static inline int msm_dma_map_sg_lazy(struct device *dev,
struct scatterlist *sg, int nents,
enum dma_data_direction dir,
struct dma_buf *dma_buf)
{
return -EINVAL;
}
static inline int msm_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, enum dma_data_direction dir,
struct dma_buf *dma_buf)
{
return -EINVAL;
}
static inline void msm_dma_unmap_sg(struct device *dev,
struct scatterlist *sgl, int nents,
enum dma_data_direction dir,
struct dma_buf *dma_buf)
{
}
static inline int msm_dma_unmap_all_for_dev(struct device *dev)
{
return 0;
}
static inline void msm_dma_buf_freed(void *buffer) {}
#endif /*CONFIG_QCOM_LAZY_MAPPING*/
#endif

View file

@ -34,6 +34,6 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
extern struct of_device_id __iommu_of_table;
#define IOMMU_OF_DECLARE(name, compat) OF_DECLARE_1(iommu, name, compat, NULL)
#define IOMMU_OF_DECLARE(name, compat) OF_DECLARE_1(iommu, name, compat, NULL)
#endif /* __OF_IOMMU_H */

View file

@ -113,6 +113,8 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
extern void dump_tasks(struct mem_cgroup *memcg,
const nodemask_t *nodemask);
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;

View file

@ -0,0 +1,91 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015-2018, The Linux Foundation. All rights reserved.
*/
#ifndef __QCOM_SECURE_BUFFER_H__
#define __QCOM_SECURE_BUFFER_H__
#include <linux/scatterlist.h>
/*
* if you add a secure VMID here make sure you update
* msm_secure_vmid_to_string.
* Make sure to keep the VMID_LAST as the last entry in the enum.
* This is needed in ion to create a list and it's sized using VMID_LAST.
*/
enum vmid {
VMID_HLOS = 0x3,
VMID_CP_TOUCH = 0x8,
VMID_CP_BITSTREAM = 0x9,
VMID_CP_PIXEL = 0xA,
VMID_CP_NON_PIXEL = 0xB,
VMID_CP_CAMERA = 0xD,
VMID_HLOS_FREE = 0xE,
VMID_MSS_MSA = 0xF,
VMID_MSS_NONMSA = 0x10,
VMID_CP_SEC_DISPLAY = 0x11,
VMID_CP_APP = 0x12,
VMID_WLAN = 0x18,
VMID_WLAN_CE = 0x19,
VMID_CP_SPSS_SP = 0x1A,
VMID_CP_CAMERA_PREVIEW = 0x1D,
VMID_CP_SPSS_SP_SHARED = 0x22,
VMID_CP_SPSS_HLOS_SHARED = 0x24,
VMID_LAST,
VMID_INVAL = -1
};
#define PERM_READ 0x4
#define PERM_WRITE 0x2
#define PERM_EXEC 0x1
#ifdef CONFIG_QCOM_SECURE_BUFFER
int msm_secure_table(struct sg_table *table);
int msm_unsecure_table(struct sg_table *table);
int hyp_assign_table(struct sg_table *table,
u32 *source_vm_list, int source_nelems,
int *dest_vmids, int *dest_perms,
int dest_nelems);
extern int hyp_assign_phys(phys_addr_t addr, u64 size,
u32 *source_vmlist, int source_nelems,
int *dest_vmids, int *dest_perms, int dest_nelems);
bool msm_secure_v2_is_supported(void);
const char *msm_secure_vmid_to_string(int secure_vmid);
#else
static inline int msm_secure_table(struct sg_table *table)
{
return -EINVAL;
}
static inline int msm_unsecure_table(struct sg_table *table)
{
return -EINVAL;
}
static inline int hyp_assign_table(struct sg_table *table,
u32 *source_vm_list, int source_nelems,
int *dest_vmids, int *dest_perms,
int dest_nelems)
{
return -EINVAL;
}
static inline int hyp_assign_phys(phys_addr_t addr, u64 size,
u32 *source_vmlist, int source_nelems,
int *dest_vmids, int *dest_perms, int dest_nelems)
{
return -EINVAL;
}
static inline bool msm_secure_v2_is_supported(void)
{
return false;
}
static inline const char *msm_secure_vmid_to_string(int secure_vmid)
{
return "N/A";
}
#endif
#endif

View file

@ -291,11 +291,12 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
unsigned long vm_flags,
pgprot_t prot, const void *caller)
{
int i;
unsigned long i;
struct page **pages;
struct vm_struct *area;
pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
pages = kvmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
if (!pages)
return NULL;
@ -304,7 +305,7 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
kfree(pages);
kvfree(pages);
if (!area)
return NULL;
@ -314,12 +315,14 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
/*
* unmaps a range previously mapped by dma_common_*_remap
*/
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags,
bool no_warn)
{
struct vm_struct *area = find_vm_area(cpu_addr);
if (!area || (area->flags & vm_flags) != vm_flags) {
WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
WARN(!no_warn, "trying to free invalid coherent area: %pK\n",
cpu_addr);
return;
}

View file

@ -387,7 +387,7 @@ static void select_bad_process(struct oom_control *oc)
* State information includes task's pid, uid, tgid, vm size, rss,
* pgtables_bytes, swapents, oom_score_adj value, and name.
*/
static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
{
struct task_struct *p;
struct task_struct *task;