/*
 * Decompiled with CFR 0.152.
 */
package ws.palladian.nodes.extraction.corpus;

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.commons.lang3.Functions;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpec;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataRow;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.DataTableSpecCreator;
import org.knime.core.data.RowKey;
import org.knime.core.data.StringValue;
import org.knime.core.data.collection.CollectionDataValue;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.IntCell;
import org.knime.core.data.def.StringCell;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.knime.core.node.port.PortObjectSpec;
import org.knime.core.node.streamable.InputPortRole;
import org.knime.core.node.streamable.OutputPortRole;
import org.knime.core.node.streamable.PartitionInfo;
import org.knime.core.node.streamable.PortInput;
import org.knime.core.node.streamable.PortOutput;
import org.knime.core.node.streamable.RowInput;
import org.knime.core.node.streamable.RowOutput;
import org.knime.core.node.streamable.StreamableOperator;
import ws.palladian.helper.collection.Bag;
import ws.palladian.nodes.PalladianPluginActivator;
import ws.palladian.nodes.extraction.corpus.CorpusCreatorNodeSettings;
import ws.palladian.nodes.helper.PalladianKnimeHelper;

public class CorpusCreatorNodeModel
extends NodeModel {
    public static final String NUM_DOCS_ENTRY = "#DOCS";
    private final CorpusCreatorNodeSettings nodeSettings = new CorpusCreatorNodeSettings();

    protected CorpusCreatorNodeModel() {
        super(1, 1);
    }

    protected BufferedDataTable[] execute(BufferedDataTable[] inData, ExecutionContext exec) throws Exception {
        PalladianPluginActivator.checkLicense();
        CorpusData corpusData = new CorpusData();
        int textCellIdx = inData[0].getSpec().findColumnIndex(this.nodeSettings.collectionColumn.getStringValue());
        ExecutionMonitor collectProgress = exec.createSubProgress(0.5);
        for (DataRow row2 : inData[0]) {
            corpusData.add(row2.getCell(textCellIdx));
            collectProgress.setProgress((double)((float)corpusData.numDocuments / (float)inData[0].size()), () -> "Read row '" + String.valueOf(row2.getKey()) + "'");
        }
        DataTableSpec spec = CorpusCreatorNodeModel.createOutSpec();
        long outIdx = 0L;
        BufferedDataContainer container = exec.createDataContainer(spec);
        int numTerms = corpusData.termCorpus.unique().size();
        ExecutionMonitor writeProgress = exec.createSubProgress(0.5);
        corpusData.write(row -> {
            container.addRowToTable(row);
            writeProgress.setProgress((double)((float)outIdx / (float)numTerms), () -> "Wrote row " + String.valueOf(row.getKey()));
        });
        container.close();
        return new BufferedDataTable[]{container.getTable()};
    }

    private static DataTableSpec createOutSpec() {
        return new DataTableSpecCreator().addColumns(new DataColumnSpec[]{new DataColumnSpecCreator("term", StringCell.TYPE).createSpec(), new DataColumnSpecCreator("count", IntCell.TYPE).createSpec()}).createSpec();
    }

    protected DataTableSpec[] configure(DataTableSpec[] inSpecs) throws InvalidSettingsException {
        DataTableSpec inSpec = inSpecs[0];
        PalladianKnimeHelper.checkHasColumn(inSpec, this.nodeSettings.collectionColumn.getStringValue(), CollectionDataValue.class);
        return new DataTableSpec[]{CorpusCreatorNodeModel.createOutSpec()};
    }

    protected void loadInternals(File nodeInternDir, ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    }

    protected void saveInternals(File nodeInternDir, ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    }

    protected void saveSettingsTo(NodeSettingsWO settings) {
        this.nodeSettings.saveSettingsTo(settings);
    }

    protected void validateSettings(NodeSettingsRO settings) throws InvalidSettingsException {
        this.nodeSettings.validateSettings(settings);
    }

    protected void loadValidatedSettingsFrom(NodeSettingsRO settings) throws InvalidSettingsException {
        this.nodeSettings.loadValidatedSettingsFrom(settings);
    }

    protected void reset() {
    }

    public InputPortRole[] getInputPortRoles() {
        return new InputPortRole[]{InputPortRole.NONDISTRIBUTED_STREAMABLE};
    }

    public OutputPortRole[] getOutputPortRoles() {
        return new OutputPortRole[]{OutputPortRole.NONDISTRIBUTED};
    }

    public StreamableOperator createStreamableOperator(PartitionInfo partitionInfo, PortObjectSpec[] inSpecs) throws InvalidSettingsException {
        final int textCellIdx = ((DataTableSpec)inSpecs[0]).findColumnIndex(this.nodeSettings.collectionColumn.getStringValue());
        return new StreamableOperator(){

            public void runFinal(PortInput[] inputs, PortOutput[] outputs, ExecutionContext exec) throws Exception {
                DataRow row;
                RowInput rowInput = (RowInput)inputs[0];
                RowOutput rowOutput = (RowOutput)outputs[0];
                CorpusData corpusData = new CorpusData();
                while ((row = rowInput.poll()) != null) {
                    exec.checkCanceled();
                    exec.setProgress("Read row " + String.valueOf(row.getKey()));
                    corpusData.add(row.getCell(textCellIdx));
                }
                rowInput.close();
                corpusData.write(outRow -> Functions.accept(outRow1 -> {
                    exec.checkCanceled();
                    exec.setProgress("Wrote row " + String.valueOf(outRow1.getKey()));
                    rowOutput.push(outRow1);
                }, (Object)outRow));
                rowOutput.close();
            }
        };
    }

    private static final class CorpusData {
        Bag<String> termCorpus = new Bag();
        int numDocuments = 0;

        private CorpusData() {
        }

        void add(DataCell inputCell) {
            if (inputCell.isMissing()) {
                return;
            }
            CollectionDataValue termCell = (CollectionDataValue)inputCell;
            HashSet terms = new HashSet();
            termCell.forEach(c -> {
                boolean bl = terms.add(((StringValue)c).getStringValue());
            });
            this.termCorpus.addAll(terms);
            ++this.numDocuments;
        }

        void write(Consumer<DataRow> rowConsumer) {
            long outIdx = 0L;
            rowConsumer.accept((DataRow)new DefaultRow(RowKey.createRowKey((long)outIdx++), new DataCell[]{new StringCell(CorpusCreatorNodeModel.NUM_DOCS_ENTRY), new IntCell(this.numDocuments)}));
            for (Map.Entry term : this.termCorpus.unique()) {
                rowConsumer.accept((DataRow)new DefaultRow(RowKey.createRowKey((long)outIdx++), new DataCell[]{new StringCell((String)term.getKey()), new IntCell(((Integer)term.getValue()).intValue())}));
            }
        }
    }
}

