/*
 * Decompiled with CFR 0.152.
 */
package org.apache.sysds.runtime.transform.tokenize.applier;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.sysds.common.Types;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.transform.tokenize.DocumentRepresentation;
import org.apache.sysds.runtime.transform.tokenize.applier.TokenizerApplier;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.wink.json4j.JSONException;
import org.apache.wink.json4j.JSONObject;

public class TokenizerApplierHash
extends TokenizerApplier {
    private static final long serialVersionUID = 4763889041868044668L;
    public int num_features = 0x100000;
    private List<Map<Integer, Long>> hashes;

    public TokenizerApplierHash(int numIdCols, int maxTokens, boolean wideFormat, boolean applyPadding, JSONObject params) throws JSONException {
        super(numIdCols, maxTokens, wideFormat, applyPadding);
        if (!applyPadding && wideFormat) {
            LOG.warn((Object)"ApplyPadding was set to 'false', Hash Tokenizer with wide format always has padding applied");
        }
        if (params != null && params.has("num_features")) {
            this.num_features = params.getInt("num_features");
        }
    }

    @Override
    public int getNumRows(DocumentRepresentation[] internalRepresentation) {
        if (this.wideFormat) {
            return internalRepresentation.length;
        }
        if (this.applyPadding) {
            return this.maxTokens * internalRepresentation.length;
        }
        return this.hashes.stream().mapToInt(hashMap -> Math.min(hashMap.size(), this.maxTokens)).sum();
    }

    @Override
    public void allocateInternalMeta(int numDocuments) {
        this.hashes = new ArrayList<Object>(Collections.nCopies(numDocuments, null));
    }

    @Override
    public void build(DocumentRepresentation[] internalRepresentation, int inputRowStart, int blk) {
        int endIndex = UtilFunctions.getEndIndex(internalRepresentation.length, inputRowStart, blk);
        for (int i = inputRowStart; i < endIndex; ++i) {
            List hashList = internalRepresentation[i].tokens.stream().map(token -> {
                int mod = token.hashCode() % this.num_features;
                if (mod < 0) {
                    mod += this.num_features;
                }
                return mod;
            }).collect(Collectors.toList());
            Map hashCounts = hashList.stream().collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
            this.hashes.set(i, new TreeMap(hashCounts));
        }
    }

    @Override
    public int applyInternalRepresentation(DocumentRepresentation[] internalRepresentation, FrameBlock out, int inputRowStart, int blk) {
        int endIndex = UtilFunctions.getEndIndex(internalRepresentation.length, inputRowStart, blk);
        int outputRow = this.getOutputRow(inputRowStart, this.hashes);
        for (int i = inputRowStart; i < endIndex; ++i) {
            List<Object> keys = internalRepresentation[i].keys;
            Map<Integer, Long> sortedHashes = this.hashes.get(i);
            outputRow = this.wideFormat ? this.setTokensWide(outputRow, keys, sortedHashes, out) : this.setTokensLong(outputRow, keys, sortedHashes, out);
        }
        return outputRow;
    }

    private int setTokensLong(int row, List<Object> keys, Map<Integer, Long> sortedHashes, FrameBlock out) {
        int numTokens = 0;
        for (Map.Entry<Integer, Long> hashCount : sortedHashes.entrySet()) {
            if (numTokens >= this.maxTokens) break;
            int col = this.setKeys(row, keys, out);
            int hash = hashCount.getKey() + 1;
            long count = hashCount.getValue();
            out.set(row, col, hash);
            out.set(row, col + 1, count);
            ++numTokens;
            ++row;
        }
        if (this.applyPadding) {
            row = this.applyPaddingLong(row, numTokens, keys, out, "", 0L);
        }
        return row;
    }

    private int setTokensWide(int row, List<Object> keys, Map<Integer, Long> sortedHashes, FrameBlock out) {
        int numKeys = this.setKeys(row, keys, out);
        for (int tokenPos = 0; tokenPos < this.maxTokens; ++tokenPos) {
            long positionHash = sortedHashes.getOrDefault(tokenPos, 0L);
            out.set(row, numKeys + tokenPos, positionHash);
        }
        return ++row;
    }

    @Override
    public Types.ValueType[] getOutSchema() {
        if (this.wideFormat) {
            return TokenizerApplierHash.getOutSchemaWide(this.numIdCols, this.maxTokens);
        }
        return TokenizerApplierHash.getOutSchemaLong(this.numIdCols);
    }

    private static Types.ValueType[] getOutSchemaWide(int numIdCols, int maxTokens) {
        int i;
        Types.ValueType[] schema = new Types.ValueType[numIdCols + maxTokens];
        for (i = 0; i < numIdCols; ++i) {
            schema[i] = Types.ValueType.STRING;
        }
        int j = 0;
        while (j < maxTokens) {
            schema[i] = Types.ValueType.INT64;
            ++j;
            ++i;
        }
        return schema;
    }

    private static Types.ValueType[] getOutSchemaLong(int numIdCols) {
        Types.ValueType[] schema = UtilFunctions.nCopies(numIdCols + 2, Types.ValueType.STRING);
        schema[numIdCols] = Types.ValueType.INT64;
        schema[numIdCols + 1] = Types.ValueType.INT64;
        return schema;
    }
}

