DataWeave is MuleSoft's powerful transformation language. This comprehensive guide covers everything from basic transformations to advanced techniques that solve real-world integration challenges.
DataWeave Fundamentals
Understanding DataWeave's core concepts:
%dw 2.0
output application/json
// Basic transformation structure
{
// Header section defines output format
// Body section contains transformation logic
}Data Types and Coercion
%dw 2.0
output application/json
var inputData = {
"stringNum": "123",
"dateString": "2025-01-15",
"boolString": "true",
"nullValue": null
}
---
{
// Type coercion examples
asNumber: inputData.stringNum as Number,
asDate: inputData.dateString as Date {format: "yyyy-MM-dd"},
asBoolean: inputData.boolString as Boolean,
// Default values for null handling
withDefault: inputData.nullValue default "N/A",
// Conditional coercion
safeNumber: if (inputData.stringNum is String)
inputData.stringNum as Number
else
0
}JSON to XML Transformations
Convert JSON payloads to XML with namespaces:
%dw 2.0
output application/xml
ns soap http://schemas.xmlsoap.org/soap/envelope/
ns ord http://example.com/orders
var orderJson = {
"orderId": "ORD-12345",
"customer": {
"id": "CUST-001",
"name": "Acme Corp",
"email": "orders@acme.com"
},
"items": [
{"sku": "PROD-A", "quantity": 10, "price": 25.99},
{"sku": "PROD-B", "quantity": 5, "price": 49.99}
],
"total": 509.85
}
---
{
soap#Envelope: {
soap#Header: {},
soap#Body: {
ord#CreateOrder: {
ord#OrderHeader: {
ord#OrderId: orderJson.orderId,
ord#OrderDate: now() as String {format: "yyyy-MM-dd'T'HH:mm:ss"},
ord#Customer: {
ord#CustomerId: orderJson.customer.id,
ord#CustomerName: orderJson.customer.name,
ord#Email: orderJson.customer.email
}
},
ord#OrderLines: {
(orderJson.items map (item, index) -> {
ord#OrderLine: {
ord#LineNumber: index + 1,
ord#SKU: item.sku,
ord#Quantity: item.quantity,
ord#UnitPrice: item.price,
ord#LineTotal: item.quantity * item.price
}
})
},
ord#OrderTotal: orderJson.total
}
}
}
}XML to JSON Transformations
Handle complex XML with attributes and namespaces:
%dw 2.0
output application/json
var xmlInput = read('<?xml version="1.0"?>
<soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<GetCustomerResponse xmlns="http://example.com/customer">
<Customer id="CUST-001" status="active">
<Name>Acme Corporation</Name>
<Addresses>
<Address type="billing" primary="true">
<Street>123 Main St</Street>
<City>New York</City>
<Country>USA</Country>
</Address>
<Address type="shipping">
<Street>456 Warehouse Blvd</Street>
<City>Newark</City>
<Country>USA</Country>
</Address>
</Addresses>
<Contacts>
<Contact role="primary">John Doe</Contact>
<Contact role="billing">Jane Smith</Contact>
</Contacts>
</Customer>
</GetCustomerResponse>
</soap:Body>
</soap:Envelope>', "application/xml")
---
{
customer: {
// Access attributes with @
id: xmlInput.Envelope.Body.GetCustomerResponse.Customer.@id,
status: xmlInput.Envelope.Body.GetCustomerResponse.Customer.@status,
name: xmlInput.Envelope.Body.GetCustomerResponse.Customer.Name,
// Transform addresses array
addresses: xmlInput.Envelope.Body.GetCustomerResponse.Customer.Addresses.*Address map (addr) -> {
addressType: addr.@type,
isPrimary: addr.@primary default "false" == "true",
street: addr.Street,
city: addr.City,
country: addr.Country
},
// Transform contacts with role mapping
contacts: xmlInput.Envelope.Body.GetCustomerResponse.Customer.Contacts.*Contact map (contact) -> {
role: contact.@role,
name: contact
}
}
}CSV and Flat File Processing
Handle CSV with custom delimiters and headers:
%dw 2.0
output application/json
var csvData = "employee_id|first_name|last_name|department|salary|hire_date
E001|John|Doe|Engineering|85000|2020-03-15
E002|Jane|Smith|Marketing|72000|2019-08-22
E003|Bob|Johnson|Engineering|92000|2018-01-10
E004|Alice|Williams|Sales|78000|2021-05-01"
var parsedCsv = read(csvData, "application/csv", {
separator: "|",
header: true
})
---
{
totalEmployees: sizeOf(parsedCsv),
// Group by department
byDepartment: parsedCsv groupBy $.department mapObject ((employees, dept) -> {
(dept): {
count: sizeOf(employees),
totalSalary: sum(employees.salary map ($ as Number)),
avgSalary: avg(employees.salary map ($ as Number)),
employees: employees map {
id: $.employee_id,
name: $.first_name ++ " " ++ $.last_name,
salary: $.salary as Number
}
}
}),
// Find highest paid
highestPaid: (parsedCsv orderBy -(($.salary) as Number))[0] then {
name: $.first_name ++ " " ++ $.last_name,
salary: $.salary as Number,
department: $.department
}
}Generate CSV Output
%dw 2.0
output application/csv separator=",", header=true, quoteValues=true
var jsonOrders = [
{orderId: "O001", customer: "Acme", product: "Widget A", qty: 10, price: 25.99},
{orderId: "O002", customer: "Beta Inc", product: "Widget B", qty: 5, price: 49.99},
{orderId: "O003", customer: "Acme", product: "Widget C", qty: 20, price: 15.50}
]
---
jsonOrders map {
"Order ID": $.orderId,
"Customer Name": $.customer,
"Product": $.product,
"Quantity": $.qty,
"Unit Price": $.price,
"Line Total": $.qty * $.price,
"Order Date": now() as String {format: "yyyy-MM-dd"}
}Advanced Array Operations
Master complex array manipulations:
%dw 2.0
output application/json
var salesData = [
{region: "North", product: "A", q1: 100, q2: 150, q3: 120, q4: 180},
{region: "North", product: "B", q1: 80, q2: 90, q3: 110, q4: 95},
{region: "South", product: "A", q1: 200, q2: 180, q3: 220, q4: 250},
{region: "South", product: "B", q1: 150, q2: 160, q3: 140, q4: 170},
{region: "East", product: "A", q1: 90, q2: 100, q3: 95, q4: 110},
{region: "West", product: "A", q1: 120, q2: 130, q3: 125, q4: 140}
]
// Helper function for annual total
fun annualTotal(record) = record.q1 + record.q2 + record.q3 + record.q4
---
{
// Flatten quarterly data to rows
flattenedData: salesData flatMap (record) -> [
{region: record.region, product: record.product, quarter: "Q1", sales: record.q1},
{region: record.region, product: record.product, quarter: "Q2", sales: record.q2},
{region: record.region, product: record.product, quarter: "Q3", sales: record.q3},
{region: record.region, product: record.product, quarter: "Q4", sales: record.q4}
],
// Pivot: Region totals by quarter
regionQuarterPivot: salesData groupBy $.region mapObject ((records, region) -> {
(region): {
Q1: sum(records.q1),
Q2: sum(records.q2),
Q3: sum(records.q3),
Q4: sum(records.q4),
Annual: sum(records map annualTotal($))
}
}),
// Product performance ranking
productRanking: (salesData groupBy $.product mapObject ((records, product) -> {
(product): sum(records map annualTotal($))
}) pluck ((value, key) -> {product: key, totalSales: value}))
orderBy -$.totalSales,
// Find best quarter per region
bestQuarterByRegion: salesData groupBy $.region mapObject ((records, region) -> {
(region): do {
var totals = {
Q1: sum(records.q1),
Q2: sum(records.q2),
Q3: sum(records.q3),
Q4: sum(records.q4)
}
var maxVal = max(totals pluck $)
---
(totals filterObject ((v, k) -> v == maxVal) pluck $$)[0]
}
}),
// Distinct regions and products
distinctRegions: salesData.region distinctBy $,
distinctProducts: salesData.product distinctBy $,
// Cross join for all combinations
allCombinations: (salesData.region distinctBy $) flatMap ((region) ->
(salesData.product distinctBy $) map ((product) -> {
region: region,
product: product
})
)
}Dynamic Key Handling
Work with dynamic and variable keys:
%dw 2.0
output application/json
var dynamicData = {
"field_001": "value1",
"field_002": "value2",
"custom_abc": "valueA",
"custom_xyz": "valueB",
"metadata_created": "2025-01-15",
"metadata_updated": "2025-01-20"
}
var keyMapping = {
"field_001": "primaryField",
"field_002": "secondaryField"
}
---
{
// Rename keys dynamically
renamedFields: dynamicData mapObject ((value, key) -> {
((keyMapping[key as String] default key)): value
}),
// Group by key prefix
groupedByPrefix: dynamicData
pluck ((value, key) -> {key: key, value: value})
groupBy ((item) -> (item.key as String) splitBy "_")[0]
mapObject ((items, prefix) -> {
(prefix): items reduce ((item, acc = {}) -> acc ++ {
(((item.key as String) splitBy "_")[1 to -1] joinBy "_"): item.value
})
}),
// Filter keys by pattern
customFieldsOnly: dynamicData filterObject ((v, k) ->
(k as String) startsWith "custom_"
),
// Dynamic key construction
constructedObject: ["alpha", "beta", "gamma"] reduce ((item, acc = {}) ->
acc ++ {("dynamic_" ++ item): upper(item)}
)
}Error Handling and Validation
Robust error handling patterns:
%dw 2.0
output application/json
var inputPayload = {
"orders": [
{"id": "O1", "amount": "100.50", "currency": "USD", "date": "2025-01-15"},
{"id": "O2", "amount": "invalid", "currency": "EUR", "date": "2025-01-16"},
{"id": "O3", "amount": "200.00", "currency": null, "date": "bad-date"},
{"id": null, "amount": "150.00", "currency": "GBP", "date": "2025-01-17"}
]
}
// Validation function
fun validateOrder(order) = {
isValid: (order.id != null) and
(order.amount is String and (order.amount matches /^\d+(\.\d{2})?$/)) and
(order.currency != null) and
(order.date is String and (order.date matches /^\d{4}-\d{2}-\d{2}$/)),
errors: (
(if (order.id == null) ["Missing order ID"] else []) ++
(if (!(order.amount is String and (order.amount matches /^\d+(\.\d{2})?$/))) ["Invalid amount format"] else []) ++
(if (order.currency == null) ["Missing currency"] else []) ++
(if (!(order.date is String and (order.date matches /^\d{4}-\d{2}-\d{2}$/))) ["Invalid date format"] else [])
)
}
// Safe type conversion
fun safeToNumber(val) =
if (val is String and (val matches /^-?\d+(\.\d+)?$/))
val as Number
else
null
fun safeToDate(val, format) =
try (() -> val as Date {format: format})
orElse null
---
{
// Validate all orders
validationResults: inputPayload.orders map ((order, index) -> {
orderIndex: index,
order: order,
validation: validateOrder(order)
}),
// Separate valid and invalid
validOrders: inputPayload.orders filter ((order) -> validateOrder(order).isValid),
invalidOrders: inputPayload.orders
map ((order, index) -> {order: order, index: index, validation: validateOrder(order)})
filter ((item) -> !item.validation.isValid)
map {
orderIndex: $.index,
originalData: $.order,
errors: $.validation.errors
},
// Transform with safe conversions
processedOrders: inputPayload.orders map ((order) -> {
id: order.id default "UNKNOWN",
amount: safeToNumber(order.amount) default 0,
currency: order.currency default "USD",
date: safeToDate(order.date, "yyyy-MM-dd"),
processed: true
}),
// Summary
summary: {
total: sizeOf(inputPayload.orders),
valid: sizeOf(inputPayload.orders filter ((o) -> validateOrder(o).isValid)),
invalid: sizeOf(inputPayload.orders filter ((o) -> !validateOrder(o).isValid))
}
}Custom Functions and Modules
Create reusable DataWeave modules:
// File: src/main/resources/dw/CommonFunctions.dwl
%dw 2.0
// String utilities
fun capitalize(str: String): String =
upper(str[0]) ++ lower(str[1 to -1])
fun toTitleCase(str: String): String =
str splitBy " " map capitalize($) joinBy " "
fun truncate(str: String, maxLen: Number): String =
if (sizeOf(str) > maxLen)
str[0 to maxLen - 4] ++ "..."
else
str
// Date utilities
fun formatDateISO(date: Date): String =
date as String {format: "yyyy-MM-dd'T'HH:mm:ss'Z'"}
fun daysBetween(date1: Date, date2: Date): Number =
(date2 - date1) as Number {unit: "days"}
fun isBusinessDay(date: Date): Boolean = do {
var dayOfWeek = date as String {format: "E"}
---
!(dayOfWeek == "Sat" or dayOfWeek == "Sun")
}
// Currency formatting
fun formatCurrency(amount: Number, currency: String): String = do {
var symbols = {
"USD": "\$",
"EUR": "€",
"GBP": "£"
}
---
(symbols[currency] default currency ++ " ") ++
(amount as String {format: "#,##0.00"})
}
// UUID generation (simplified)
fun generateId(prefix: String): String =
prefix ++ "-" ++ uuid() splitBy "-" joinBy ""
// Null-safe navigation
fun safeGet(obj: Object, path: String): Any =
path splitBy "." reduce ((key, acc = obj) ->
if (acc is Object) acc[key] else null
)Using Custom Modules
%dw 2.0
output application/json
// Import custom module
import * from dw::CommonFunctions
var customerData = {
name: "john doe",
email: "JOHN.DOE@EXAMPLE.COM",
description: "This is a very long description that should be truncated for display purposes in the UI",
orderDate: "2025-01-15" as Date,
orderTotal: 1234.56
}
---
{
displayName: toTitleCase(customerData.name),
email: lower(customerData.email),
shortDescription: truncate(customerData.description, 50),
orderDateFormatted: formatDateISO(customerData.orderDate),
daysSinceOrder: daysBetween(customerData.orderDate, now()),
totalFormatted: formatCurrency(customerData.orderTotal, "USD"),
transactionId: generateId("TXN")
}Performance Optimization
Optimize DataWeave for large payloads:
%dw 2.0
output application/json
// Use streaming for large files
// output application/json streaming=true
var largeDataset = (1 to 10000) as Array map {
id: $,
value: random() * 1000,
category: ["A", "B", "C", "D"][mod($, 4)]
}
---
{
// GOOD: Use reduce instead of multiple iterations
optimizedAggregation: largeDataset reduce ((item, acc = {
count: 0,
sum: 0,
categories: {}
}) -> {
count: acc.count + 1,
sum: acc.sum + item.value,
categories: acc.categories ++ {
(item.category): (acc.categories[item.category] default 0) + 1
}
}),
// GOOD: Filter early, transform later
filteredFirst: largeDataset
filter ($.value > 500) // Reduce dataset size first
map { // Then transform
id: $.id,
roundedValue: round($.value)
},
// GOOD: Use distinctBy with specific field
uniqueCategories: largeDataset.category distinctBy $,
// Limit output for previews
preview: largeDataset[0 to 9]
}
/*
Performance Tips:
1. Use streaming for files > 10MB
2. Filter before mapping
3. Avoid nested loops - use groupBy and reduce
4. Use lazy evaluation with defer
5. Minimize type coercions in loops
*/Conclusion
DataWeave's functional approach makes complex transformations elegant and maintainable. Master type coercion, array operations, and custom functions to handle any integration scenario. Use modules for reusability and follow performance best practices for production workloads. With these techniques, you can tackle any MuleSoft transformation challenge.