Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix array type detection on fill-array-data DEX instructions - fixes #1806 #2084

Merged
merged 3 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main/java/soot/dexpler/DexBody.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicReference;

import org.jf.dexlib2.analysis.ClassPath;
import org.jf.dexlib2.analysis.ClassPathResolver;
Expand Down Expand Up @@ -771,6 +770,7 @@ public Body jimplify(Body b, SootMethod m) {
DeadAssignmentEliminator.v().transform(jBody);
UnconditionalBranchFolder.v().transform(jBody);
}
DexFillArrayDataTransformer.v().transform(jBody);

TypeAssigner.v().transform(jBody);

Expand Down
162 changes: 162 additions & 0 deletions src/main/java/soot/dexpler/DexFillArrayDataTransformer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package soot.dexpler;

/*-
* #%L
* Soot - a J*va Optimization Framework
* %%
* Copyright (C) 2012 Michael Markert, Frank Hartmann
*
* (c) 2012 University of Luxembourg - Interdisciplinary Centre for
* Security Reliability and Trust (SnT) - All rights reserved
* Alexandre Bartel
*
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import soot.ArrayType;
import soot.Body;
import soot.BodyTransformer;
import soot.G;
import soot.Local;
import soot.Type;
import soot.Unit;
import soot.Value;
import soot.dexpler.instructions.FillArrayDataInstruction;
import soot.dexpler.typing.UntypedConstant;
import soot.jimple.ArrayRef;
import soot.jimple.AssignStmt;
import soot.jimple.InvokeExpr;
import soot.jimple.NewArrayExpr;
import soot.toolkits.graph.ExceptionalUnitGraph;
import soot.toolkits.graph.ExceptionalUnitGraphFactory;
import soot.toolkits.scalar.LocalDefs;

/**
* If Dalvik bytecode can contain <code>fill-array-data</code> instructions that can fill an array with data elements we only
* know the element size of.
*
* Therefore when processing such instructions in {@link FillArrayDataInstruction} we don't know the exact type of the data
* that is loaded. Because of (conditional) branches in the code, identifying the type is not always possible at that stage.
* Instead {@link UntypedConstant} constants are used. These constants are processed by this transformer and get their final
* type.
*
*
* @author Jan Peter Stotz
*
*/
public class DexFillArrayDataTransformer extends BodyTransformer {
private static final Logger logger = LoggerFactory.getLogger(DexFillArrayDataTransformer.class);

public static DexFillArrayDataTransformer v() {
return new DexFillArrayDataTransformer();
}

protected void internalTransform(final Body body, String phaseName, Map<String, String> options) {
final ExceptionalUnitGraph g = ExceptionalUnitGraphFactory.createExceptionalUnitGraph(body, DalvikThrowAnalysis.v());
final LocalDefs defs = G.v().soot_toolkits_scalar_LocalDefsFactory().newLocalDefs(g);

for (Iterator<Unit> unitIt = body.getUnits().snapshotIterator(); unitIt.hasNext();) {
Unit u = unitIt.next();
if (!(u instanceof AssignStmt)) {
continue;
}
AssignStmt ass = (AssignStmt) u;
Value rightOp = ass.getRightOp();
if (rightOp instanceof UntypedConstant) {
Value left = ass.getLeftOp();
if (left instanceof ArrayRef) {
ArrayRef leftArray = (ArrayRef) left;

Local l = (Local) leftArray.getBase();
List<Type> arrayTypes = new LinkedList<>();
checkArrayDefinitions(l, ass, defs, arrayTypes);
if (arrayTypes.isEmpty()) {
throw new InternalError("Failed to determine the array type ");
}
if (arrayTypes.size() > 1) {
arrayTypes = arrayTypes.stream().distinct().collect(Collectors.toList());
if (arrayTypes.size() > 1) {
logger.warn("Found multiple possible array types, using first ignoreing the others: {}", arrayTypes);
StevenArzt marked this conversation as resolved.
Show resolved Hide resolved
}
}

// We found the array type, now convert the untyped constant value to it's final type
Type elementType = arrayTypes.get(0);
Value constant = ass.getRightOp();
UntypedConstant untyped = (UntypedConstant) constant;
ass.setRightOp(untyped.defineType(elementType));
}
}
}
}

/**
* Check the all available definitions of the current array to detect the array type and thus the type of the data loaded
* by the array-fill-data instruction.
*
* @param l
* local the array we are interested in is saved in
* @param u
* unit we start our search
* @param defs
* @param arrayTypes
* result list containing the discovered array type(s)
*/
private void checkArrayDefinitions(Local l, Unit u, LocalDefs defs, List<Type> arrayTypes) {
List<Unit> assDefs = defs.getDefsOfAt(l, u);
for (Unit d : assDefs) {
if (d instanceof AssignStmt) {
AssignStmt arrayAssign = (AssignStmt) d;
Value source = arrayAssign.getRightOp();
if (source instanceof NewArrayExpr) {
// array is assigned from a newly created array
NewArrayExpr newArray = (NewArrayExpr) source;
arrayTypes.add(newArray.getBaseType());
continue;
}
if (source instanceof InvokeExpr) {
jpstotz marked this conversation as resolved.
Show resolved Hide resolved
// array is assigned from the return value of a function
InvokeExpr invExpr = (InvokeExpr) source;
Type aType = invExpr.getMethodRef().getReturnType();
if (!(aType instanceof ArrayType)) {
throw new InternalError("Failed to identify the array type. The identified method invocation "
+ "does not return an array type. Invocation: " + invExpr.getMethodRef());
}
arrayTypes.add(((ArrayType) aType).getArrayElementType());
continue;
}
if (source instanceof Local) {
// our array is defined by an assignment from another array => check the definition of that other array.
Local newLocal = (Local) source; // local of the "other array"
checkArrayDefinitions(newLocal, d, defs, arrayTypes);
jpstotz marked this conversation as resolved.
Show resolved Hide resolved
continue;
}
throw new InternalError("Unsupported array definition statement: " + d);
}
}

}
}
119 changes: 26 additions & 93 deletions src/main/java/soot/dexpler/instructions/FillArrayDataInstruction.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,41 +27,35 @@
* #L%
*/

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.jf.dexlib2.iface.instruction.Instruction;
import org.jf.dexlib2.iface.instruction.formats.ArrayPayload;
import org.jf.dexlib2.iface.instruction.formats.Instruction22c;
import org.jf.dexlib2.iface.instruction.formats.Instruction31t;
import org.jf.dexlib2.iface.reference.TypeReference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import soot.ArrayType;
import soot.BooleanType;
import soot.ByteType;
import soot.CharType;
import soot.DoubleType;
import soot.FloatType;
import soot.IntType;
import soot.Local;
import soot.LongType;
import soot.ShortType;
import soot.Type;
import soot.dexpler.DexBody;
import soot.dexpler.DexType;
import soot.dexpler.DexFillArrayDataTransformer;
import soot.dexpler.typing.UntypedConstant;
import soot.dexpler.typing.UntypedIntOrFloatConstant;
import soot.dexpler.typing.UntypedLongOrDoubleConstant;
import soot.jimple.ArrayRef;
import soot.jimple.AssignStmt;
import soot.jimple.DoubleConstant;
import soot.jimple.FloatConstant;
import soot.jimple.Constant;
import soot.jimple.IntConstant;
import soot.jimple.Jimple;
import soot.jimple.LongConstant;
import soot.jimple.NumericConstant;
import soot.jimple.Stmt;

/**
* Converts <code>fill-array-data</code> instructions and associated data blocks into a series of assignment instructions
* (one for each array index the data block contains a value).
*
* As the data block contains untyped data, only the number of bytes per element is known. Recovering the array type at the
* stage this class is used on would require a detailed analysis on the dex code. Therefore we save the data elements as
* {@link UntypedConstant} and later use {@link DexFillArrayDataTransformer} to convert the values to their final type.
*/
public class FillArrayDataInstruction extends PseudoInstruction {
private static final Logger logger = LoggerFactory.getLogger(FillArrayDataInstruction.class);

Expand Down Expand Up @@ -95,13 +89,11 @@ public void jimplify(DexBody body) {
List<Number> elements = arrayTable.getArrayElements();
int numElements = elements.size();

int elementsWidth = arrayTable.getElementWidth();
Stmt firstAssign = null;
for (int i = 0; i < numElements; i++) {
ArrayRef arrayRef = Jimple.v().newArrayRef(arrayReference, IntConstant.v(i));
NumericConstant element = getArrayElement(elements.get(i), body, destRegister);
if (element == null) {
break;
}
Constant element = getArrayElement(elements.get(i), elementsWidth);
AssignStmt assign = Jimple.v().newAssignStmt(arrayRef, element);
addTags(assign);
body.add(assign);
Expand All @@ -110,6 +102,8 @@ public void jimplify(DexBody body) {
}
}
if (firstAssign == null) { // if numElements == 0. Is it possible?
logger.warn("No assign statements created for array at address 0x{} - empty array data section?",
Integer.toHexString(targetAddress));
firstAssign = Jimple.v().newNopStmt();
body.add(firstAssign);
}
Expand All @@ -122,80 +116,19 @@ public void jimplify(DexBody body) {

}

private NumericConstant getArrayElement(Number element, DexBody body, int arrayRegister) {

List<DexlibAbstractInstruction> instructions = body.instructionsBefore(this);
Set<Integer> usedRegisters = new HashSet<Integer>();
usedRegisters.add(arrayRegister);

Type elementType = null;
Outer: for (DexlibAbstractInstruction i : instructions) {
if (usedRegisters.isEmpty()) {
break;
}

for (int reg : usedRegisters) {
if (i instanceof NewArrayInstruction) {
NewArrayInstruction newArrayInstruction = (NewArrayInstruction) i;
Instruction22c instruction22c = (Instruction22c) newArrayInstruction.instruction;
if (instruction22c.getRegisterA() == reg) {
ArrayType arrayType = (ArrayType) DexType.toSoot((TypeReference) instruction22c.getReference());
elementType = arrayType.getElementType();
break Outer;
}
}
}

// // look for obsolete registers
// for (int reg : usedRegisters) {
// if (i.overridesRegister(reg)) {
// usedRegisters.remove(reg);
// break; // there can't be more than one obsolete
// }
// }

// look for new registers
for (int reg : usedRegisters) {
int newRegister = i.movesToRegister(reg);
if (newRegister != -1) {
usedRegisters.add(newRegister);
usedRegisters.remove(reg);
break; // there can't be more than one new
}
}
private Constant getArrayElement(Number element, int elementsWidth) {
if (elementsWidth == 2) {
// For size = 2 the only possible array type is short[]
return IntConstant.v(element.shortValue());
}

if (elementType == null) {
// throw new InternalError("Unable to find array type to type array elements!");
logger.warn("Unable to find array type to type array elements! Array was not defined! (obfuscated bytecode?)");
return null;
}

NumericConstant value;

if (elementType instanceof BooleanType) {
value = IntConstant.v(element.intValue());
IntConstant ic = (IntConstant) value;
if (ic.value != 0) {
value = IntConstant.v(1);
}
} else if (elementType instanceof ByteType) {
value = IntConstant.v(element.byteValue());
} else if (elementType instanceof CharType || elementType instanceof ShortType) {
value = IntConstant.v(element.shortValue());
} else if (elementType instanceof DoubleType) {
value = DoubleConstant.v(Double.longBitsToDouble(element.longValue()));
} else if (elementType instanceof FloatType) {
value = FloatConstant.v(Float.intBitsToFloat(element.intValue()));
} else if (elementType instanceof IntType) {
value = IntConstant.v(element.intValue());
} else if (elementType instanceof LongType) {
value = LongConstant.v(element.longValue());
} else {
throw new RuntimeException("Invalid Array Type occured in FillArrayDataInstruction: " + elementType);
if (elementsWidth <= 4) {
// can be array of int, char, boolean, float
return UntypedIntOrFloatConstant.v(element.intValue());
}
return value;

// can be array of long or double
return UntypedLongOrDoubleConstant.v(element.longValue());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,22 @@ public IntConstant toIntConstant() {
return IntConstant.v(value);
}

public IntConstant toBooleanConstant() {
if (value != 0) {
return IntConstant.v(1);
}
return IntConstant.v(value);
}

@Override
public Value defineType(Type t) {
if (t instanceof FloatType) {
return this.toFloatConstant();
} else if (t instanceof IntType || t instanceof CharType || t instanceof BooleanType || t instanceof ByteType
|| t instanceof ShortType) {
return this.toIntConstant();
} else if (t instanceof BooleanType) {
return toBooleanConstant();
} else {
if (value == 0 && t instanceof RefLikeType) {
return NullConstant.v();
Expand Down
Loading