/*
 * DAA2RMA6.java Copyright (C) 2020. Daniel H. Huson
 *
 *  (Some files contain contributions from other authors, who are then mentioned separately.)
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package megan.tools;

import jloda.fx.util.ProgramExecutorService;
import jloda.swing.commands.CommandManager;
import jloda.swing.util.ArgsOptions;
import jloda.swing.util.ResourceManager;
import jloda.util.*;
import megan.accessiondb.AccessAccessionMappingDatabase;
import megan.classification.Classification;
import megan.classification.ClassificationManager;
import megan.classification.IdMapper;
import megan.classification.IdParser;
import megan.classification.data.ClassificationCommandHelper;
import megan.core.ContaminantManager;
import megan.core.Document;
import megan.core.SampleAttributeTable;
import megan.daa.io.DAAParser;
import megan.main.Megan6;
import megan.main.MeganProperties;
import megan.parsers.blast.BlastFileFormat;
import megan.rma6.RMA6Connector;
import megan.rma6.RMA6FromBlastCreator;
import megan.util.DAAFileFilter;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.sql.SQLException;
import java.util.*;

import static megan.accessiondb.AccessAccessionMappingDatabase.SQLiteTempStoreDirectoryProgramProperty;
import static megan.accessiondb.AccessAccessionMappingDatabase.SQLiteTempStoreInMemoryProgramProperty;

/**
 * compute an RMA6 file from a DAA file generated by DIAMOND
 * Daniel Huson, 8.2015
 */
public class DAA2RMA6 {
    /**
     * merge RMA files
     *
     * @param args
     * @throws UsageException
     * @throws IOException
     */
    public static void main(String[] args) {
        try {
            ResourceManager.addResourceRoot(Megan6.class, "megan.resources");
            ProgramProperties.setProgramName("DAA2RMA");
            ProgramProperties.setProgramVersion(megan.main.Version.SHORT_DESCRIPTION);

            PeakMemoryUsageMonitor.start();
            (new DAA2RMA6()).run(args);
            System.err.println("Total time:  " + PeakMemoryUsageMonitor.getSecondsSinceStartString());
            System.err.println("Peak memory: " + PeakMemoryUsageMonitor.getPeakUsageString());
            System.exit(0);
        } catch (Exception ex) {
            Basic.caught(ex);
            System.exit(1);
        }
    }

    /**
     * run
     *
     * @param args
     * @throws UsageException
     * @throws IOException
     * @throws ClassNotFoundException
     */
    private void run(String[] args) throws UsageException, IOException, ClassNotFoundException, CanceledException, SQLException {
        CommandManager.getGlobalCommands().addAll(ClassificationCommandHelper.getGlobalCommands());

        final ArgsOptions options = new ArgsOptions(args, this, "Computes a MEGAN .rma6 file from a DIAMOND .daa file");
        options.setVersion(ProgramProperties.getProgramVersion());
        options.setLicense("Copyright (C) 2020 Daniel H. Huson. This program comes with ABSOLUTELY NO WARRANTY.");
        options.setAuthors("Daniel H. Huson");

        options.comment("Input");
        final String[] daaFiles = options.getOptionMandatory("-i", "in", "Input DAA file", new String[0]);
        final String[] metaDataFiles = options.getOption("-mdf", "metaDataFile", "Files containing metadata to be included in RMA6 files", new String[0]);

        options.comment("Output");
        String[] outputFiles = options.getOptionMandatory("-o", "out", "Output file(s), one for each input file, or a directory", new String[0]);
        boolean useCompression = options.getOption("-c", "useCompression", "Compress reads and matches in RMA file (smaller files, longer to generate", true);

        options.comment("Reads");
        final boolean pairedReads = options.getOption("-p", "paired", "Reads are paired", false);
        final int pairedReadsSuffixLength = options.getOption("-ps", "pairedSuffixLength", "Length of name suffix used to distinguish between name (i.e. first word in header) of read and its mate (use 0 if read and mate have same name)", 0);
        final boolean pairsInSingleFile = options.getOption("-pof", "pairedReadsInOneFile", "Are paired reads in one file (usually they are in two)", false);
        options.comment("Parameters");
        boolean longReads = options.getOption("-lg", "longReads", "Parse and analyse as long reads", Document.DEFAULT_LONG_READS);

        final int maxMatchesPerRead = options.getOption("-m", "maxMatchesPerRead", "Max matches per read", 100);
        final boolean runClassifications = options.getOption("-class", "classify", "Run classification algorithm", true);
        final float minScore = options.getOption("-ms", "minScore", "Min score", Document.DEFAULT_MINSCORE);
        final float maxExpected = options.getOption("-me", "maxExpected", "Max expected", Document.DEFAULT_MAXEXPECTED);
        final float minPercentIdentity = options.getOption("-mpi", "minPercentIdentity", "Min percent identity", Document.DEFAULT_MIN_PERCENT_IDENTITY);
        final float topPercent = options.getOption("-top", "topPercent", "Top percent", Document.DEFAULT_TOPPERCENT);
        final int minSupport;
        final float minSupportPercent;
        {
            final float minSupportPercent0 = options.getOption("-supp", "minSupportPercent", "Min support as percent of assigned reads (0==off)", Document.DEFAULT_MINSUPPORT_PERCENT);
            final int minSupport0 = options.getOption("-sup", "minSupport", "Min support (0==off)", Document.DEFAULT_MINSUPPORT);
            if (minSupportPercent0 != Document.DEFAULT_MINSUPPORT_PERCENT && minSupport0 == Document.DEFAULT_MINSUPPORT) {
                minSupportPercent = minSupportPercent0;
                minSupport = 0;
            } else if (minSupportPercent0 == Document.DEFAULT_MINSUPPORT_PERCENT && minSupport0 != Document.DEFAULT_MINSUPPORT) {
                minSupportPercent = 0;
                minSupport = minSupport0;
            } else if (minSupportPercent0 != Document.DEFAULT_MINSUPPORT_PERCENT) {
                throw new IOException("Please specify a value for either --minSupport or --minSupportPercent, but not for both");
            } else {
                minSupportPercent = minSupportPercent0;
                minSupport = minSupport0;
            }
        }
        final float minPercentReadToCover = options.getOption("-mrc", "minPercentReadCover", "Min percent of read length to be covered by alignments", Document.DEFAULT_MIN_PERCENT_READ_TO_COVER);
        final float minPercentReferenceToCover = options.getOption("-mrefc", "minPercentReferenceCover", "Min percent of reference length to be covered by alignments", Document.DEFAULT_MIN_PERCENT_REFERENCE_TO_COVER);
        final int minReadLength=options.getOption("-mrl","minReadLength","Minimum read length",0);

        final Document.LCAAlgorithm lcaAlgorithm = Document.LCAAlgorithm.valueOfIgnoreCase(options.getOption("-alg", "lcaAlgorithm", "Set the LCA algorithm to use for taxonomic assignment",
                Document.LCAAlgorithm.values(), longReads ? Document.DEFAULT_LCA_ALGORITHM_LONG_READS.toString() : Document.DEFAULT_LCA_ALGORITHM_SHORT_READS.toString()));
        final float lcaCoveragePercent = options.getOption("-lcp", "lcaCoveragePercent", "Set the percent for the LCA to cover",
                lcaAlgorithm == Document.LCAAlgorithm.longReads ? Document.DEFAULT_LCA_COVERAGE_PERCENT_LONG_READS : (lcaAlgorithm == Document.LCAAlgorithm.weighted ? Document.DEFAULT_LCA_COVERAGE_PERCENT_WEIGHTED_LCA : Document.DEFAULT_LCA_COVERAGE_PERCENT_SHORT_READS));

        final String readAssignmentModeDefaultValue;
        if (options.isDoHelp()) {
            readAssignmentModeDefaultValue = (Document.DEFAULT_READ_ASSIGNMENT_MODE_LONG_READS.toString() + " in long read mode, " + Document.DEFAULT_READ_ASSIGNMENT_MODE_SHORT_READS.toString() + " else");
        } else if (longReads)
            readAssignmentModeDefaultValue = Document.DEFAULT_READ_ASSIGNMENT_MODE_LONG_READS.toString();
        else
            readAssignmentModeDefaultValue = Document.DEFAULT_READ_ASSIGNMENT_MODE_SHORT_READS.toString();
        final Document.ReadAssignmentMode readAssignmentMode = Document.ReadAssignmentMode.valueOfIgnoreCase(options.getOption("-ram", "readAssignmentMode", "Set the read assignment mode", readAssignmentModeDefaultValue));

        final String contaminantsFile = options.getOption("-cf", "conFile", "File of contaminant taxa (one Id or name per line)", "");

        options.comment("Classification support:");

        final String mapDBFile = options.getOption("-mdb", "mapDB", "MEGAN mapping db (file megan-map.db)", "");
        final Set<String> selectedClassifications = new HashSet<>(Arrays.asList(options.getOption("-on", "only", "Use only named classifications (if not set: use all)", new String[0])));

        options.comment("Deprecated classification support:");

        final boolean parseTaxonNames = options.getOption("-tn", "parseTaxonNames", "Parse taxon names", true);
        final String acc2TaxaFile = options.getOption("-a2t", "acc2taxa", "Accessopm-to-Taxonomy mapping file", "");
        final String synonyms2TaxaFile = options.getOption("-s2t", "syn2taxa", "Synonyms-to-Taxonomy mapping file", "");

        {
            final String tags = options.getOption("-t4t", "tags4taxonomy", "Tags for taxonomy id parsing (must set to activate id parsing)", "").trim();
            ProgramProperties.preset("TaxonomyTags", tags);
            ProgramProperties.preset("TaxonomyParseIds", tags.length() > 0);
        }

        final HashMap<String, String> class2AccessionFile = new HashMap<>();
        final HashMap<String, String> class2SynonymsFile = new HashMap<>();

        for (String cName : ClassificationManager.getAllSupportedClassificationsExcludingNCBITaxonomy()) {
            class2AccessionFile.put(cName, options.getOption("-a2" + cName.toLowerCase(), "acc2" + cName.toLowerCase(), "Accession-to-" + cName + " mapping file", ""));
            class2SynonymsFile.put(cName, options.getOption("-s2" + cName.toLowerCase(), "syn2" + cName.toLowerCase(), "Synonyms-to-" + cName + " mapping file", ""));
            final String tags = options.getOption("-t4" + cName.toLowerCase(), "tags4" + cName.toLowerCase(), "Tags for " + cName + " id parsing (must set to activate id parsing)", "").trim();
            ProgramProperties.preset(cName + "Tags", tags);
            ProgramProperties.preset(cName + "ParseIds", tags.length() > 0);
        }

        ProgramProperties.preset(IdParser.PROPERTIES_FIRST_WORD_IS_ACCESSION, options.getOption("-fwa", "firstWordIsAccession", "First word in reference header is accession number (set to 'true' for NCBI-nr downloaded Sep 2016 or later)", true));
        ProgramProperties.preset(IdParser.PROPERTIES_ACCESSION_TAGS, options.getOption("-atags", "accessionTags", "List of accession tags", ProgramProperties.get(IdParser.PROPERTIES_ACCESSION_TAGS, IdParser.ACCESSION_TAGS)));

        options.comment(ArgsOptions.OTHER);
        ProgramExecutorService.setNumberOfCoresToUse(options.getOption("-t", "threads", "Number of threads", 8));
        ProgramProperties.put(SQLiteTempStoreInMemoryProgramProperty,options.getOption("-tsm","tempStoreInMemory","Temporary storage in memory for SQLITE",false));
        ProgramProperties.put(SQLiteTempStoreDirectoryProgramProperty,options.getOption("-tsd","tempStoreDir","Temporary storage directory for SQLITE (if not in-memory)",""));

        options.done();

        final String propertiesFile;
        if (ProgramProperties.isMacOS())
            propertiesFile = System.getProperty("user.home") + "/Library/Preferences/Megan.def";
        else
            propertiesFile = System.getProperty("user.home") + File.separator + ".Megan.def";
        MeganProperties.initializeProperties(propertiesFile);

        for (String fileName : daaFiles) {
            Basic.checkFileReadableNonEmpty(fileName);
            if (!DAAFileFilter.getInstance().accept(fileName))
                throw new IOException("File not in DAA format (or incorrect file suffix?): " + fileName);
        }

        for (String fileName : metaDataFiles) {
            Basic.checkFileReadableNonEmpty(fileName);
        }

        if (Basic.notBlank(contaminantsFile))
            Basic.checkFileReadableNonEmpty(contaminantsFile);

        final Collection<String> mapDBClassifications = AccessAccessionMappingDatabase.getContainedClassificationsIfDBExists(mapDBFile);
        if (mapDBClassifications.size() > 0 && (Basic.hasPositiveLengthValue(class2AccessionFile) || Basic.hasPositiveLengthValue(class2SynonymsFile)))
            throw new UsageException("Illegal to use both --mapDB and ---acc2... or --syn2... options");

        if (mapDBClassifications.size() > 0)
            ClassificationManager.setMeganMapDBFile(mapDBFile);

        final ArrayList<String> cNames = new ArrayList<>();
        for (String cName : ClassificationManager.getAllSupportedClassificationsExcludingNCBITaxonomy()) {
            if ((selectedClassifications.size() == 0 || selectedClassifications.contains(cName))
                    && (mapDBClassifications.contains(cName) || class2AccessionFile.get(cName).length() > 0 || class2SynonymsFile.get(cName).length() > 0))
                cNames.add(cName);
        }
        if (cNames.size() > 0)
            System.err.println("Functional classifications to use: " + Basic.toString(cNames, ", "));

        final boolean processInPairs = (pairedReads && !pairsInSingleFile);

        if (outputFiles.length == 1) {
            if (daaFiles.length == 1 || (processInPairs && daaFiles.length == 2)) {
                if ((new File(outputFiles[0]).isDirectory()))
                    outputFiles[0] = (new File(outputFiles[0], Basic.replaceFileSuffix(Basic.getFileNameWithoutPath(Basic.getFileNameWithoutZipOrGZipSuffix(daaFiles[0])), ".rma6"))).getPath();
            } else if (daaFiles.length > 1) {
                if (!(new File(outputFiles[0]).isDirectory()))
                    throw new IOException("Multiple files given, but given single output is not a directory");
                String outputDirectory = (new File(outputFiles[0])).getParent();
                if (!processInPairs) {
                    outputFiles = new String[daaFiles.length];
                    for (int i = 0; i < daaFiles.length; i++)
                        outputFiles[i] = new File(outputDirectory, Basic.replaceFileSuffix(Basic.getFileNameWithoutZipOrGZipSuffix(Basic.getFileNameWithoutPath(daaFiles[i])), ".rma6")).getPath();
                } else {
                    outputFiles = new String[daaFiles.length / 2];
                    for (int i = 0; i < daaFiles.length; i += 2)
                        outputFiles[i / 2] = new File(outputDirectory, Basic.replaceFileSuffix(Basic.getFileNameWithoutZipOrGZipSuffix(Basic.getFileNameWithoutPath(daaFiles[i])), ".rma6")).getPath();
                }
            }
        } else // output.length >1
        {
            if ((!processInPairs && daaFiles.length != outputFiles.length) || (processInPairs && daaFiles.length != 2 * outputFiles.length))
                throw new IOException("Number of input and output files do not match");
        }

        if (metaDataFiles.length > 1 && metaDataFiles.length != outputFiles.length) {
            throw new IOException("Number of metadata files (" + metaDataFiles.length + ") doesn't match number of output files (" + outputFiles.length + ")");
        }

        final IdMapper taxonIdMapper = ClassificationManager.get(Classification.Taxonomy, true).getIdMapper();
        final IdMapper[] idMappers = new IdMapper[cNames.size()];

        // Load all mapping files:
        if (runClassifications) {
            ClassificationManager.get(Classification.Taxonomy, true);
            taxonIdMapper.setUseTextParsing(parseTaxonNames);

            if (mapDBFile.length() > 0) {
                taxonIdMapper.loadMappingFile(mapDBFile, IdMapper.MapType.MeganMapDB, false, new ProgressPercentage());
            }
            if (acc2TaxaFile.length() > 0) {
                taxonIdMapper.loadMappingFile(acc2TaxaFile, IdMapper.MapType.Accession, false, new ProgressPercentage());
            }
            if (synonyms2TaxaFile.length() > 0) {
                taxonIdMapper.loadMappingFile(synonyms2TaxaFile, IdMapper.MapType.Synonyms, false, new ProgressPercentage());
            }

            for (int i = 0; i < cNames.size(); i++) {
                final String cName = cNames.get(i);

                idMappers[i] = ClassificationManager.get(cName, true).getIdMapper();

                if (mapDBClassifications.contains(cName))
                    idMappers[i].loadMappingFile(mapDBFile, IdMapper.MapType.MeganMapDB, false, new ProgressPercentage());
                if (class2AccessionFile.get(cName).length() > 0)
                    idMappers[i].loadMappingFile(class2AccessionFile.get(cName), IdMapper.MapType.Accession, false, new ProgressPercentage());
                if (class2SynonymsFile.get(cName).length() > 0)
                    idMappers[i].loadMappingFile(class2SynonymsFile.get(cName), IdMapper.MapType.Synonyms, false, new ProgressPercentage());
            }
        }

        /*
         * process each set of files:
         */
        for (int i = 0; i < daaFiles.length; i++) {
            final int iOutput;
            if (processInPairs) {
                if ((i % 2) == 1)
                    continue; // skip odd numbers
                iOutput = i / 2;
                System.err.println("In DAA files: " + daaFiles[i] + ", " + daaFiles[i + 1]);
                System.err.println("Output file:  " + outputFiles[iOutput]);
            } else {
                iOutput = i;
                System.err.println("In DAA file:  " + daaFiles[i]);
                System.err.println("Output file:  " + outputFiles[i]);
            }

            ProgressListener progressListener = new ProgressPercentage();

            final Document doc = new Document();
            doc.getActiveViewers().add(Classification.Taxonomy);
            doc.getActiveViewers().addAll(cNames);
            doc.setMinScore(minScore);
            doc.setMaxExpected(maxExpected);
            doc.setMinPercentIdentity(minPercentIdentity);
            doc.setLcaAlgorithm(lcaAlgorithm);
            doc.setLcaCoveragePercent(lcaCoveragePercent);
            doc.setTopPercent(topPercent);
            doc.setMinSupportPercent(minSupportPercent);
            doc.setMinSupport(minSupport);
            doc.setPairedReads(pairedReads);
            doc.setPairedReadSuffixLength(pairedReadsSuffixLength);
            doc.setMinReadLength(minReadLength);
            doc.setBlastMode(DAAParser.getBlastMode(daaFiles[i]));
            doc.setLongReads(longReads);
            doc.setMinPercentReadToCover(minPercentReadToCover);
            doc.setMinPercentReferenceToCover(minPercentReferenceToCover);
            doc.setReadAssignmentMode(readAssignmentMode);

            if (contaminantsFile.length() > 0) {
                ContaminantManager contaminantManager = new ContaminantManager();
                contaminantManager.read(contaminantsFile);
                System.err.println(String.format("Contaminants profile: %,d input, %,d total", contaminantManager.inputSize(), contaminantManager.size()));
                doc.getDataTable().setContaminants(contaminantManager.getTaxonIdsString());
                doc.setUseContaminantFilter(contaminantManager.size() > 0);
            }

            if (!processInPairs)
                createRMA6FileFromDAA("DAA2RMA6", daaFiles[i], outputFiles[iOutput], useCompression, doc, maxMatchesPerRead, progressListener);
            else
                createRMA6FileFromDAAPair("DAA2RMA6", daaFiles[i], daaFiles[i + 1], outputFiles[iOutput], useCompression, doc, maxMatchesPerRead, progressListener);

            progressListener.close();

            final RMA6Connector connector = new RMA6Connector(outputFiles[iOutput]);

            if (metaDataFiles.length > 0) {
                try {
                    System.err.println("Saving metadata:");
                    SampleAttributeTable sampleAttributeTable = new SampleAttributeTable();
                    sampleAttributeTable.read(new FileReader(metaDataFiles[Math.min(iOutput, metaDataFiles.length - 1)]),
                            Collections.singletonList(Basic.getFileBaseName(Basic.getFileNameWithoutPath(outputFiles[iOutput]))), false);
                    Map<String, byte[]> label2data = new HashMap<>();
                    label2data.put(SampleAttributeTable.SAMPLE_ATTRIBUTES, sampleAttributeTable.getBytes());
                    connector.putAuxiliaryData(label2data);
                    System.err.println("done");
                } catch (Exception ex) {
                    Basic.caught(ex);
                }
            }
            progressListener.incrementProgress();
        }
    }

    /**
     * create an RMA6 file from a DAA file
     *
     * @param daaFile
     * @param rma6FileName
     * @param maxMatchesPerRead
     * @param progressListener  @throws CanceledException
     */
    private static void createRMA6FileFromDAA(String creator, String daaFile, String rma6FileName, boolean useCompression, Document doc,
                                              int maxMatchesPerRead, ProgressListener progressListener) throws IOException, CanceledException, SQLException {
        final RMA6FromBlastCreator rma6Creator = new RMA6FromBlastCreator(creator, BlastFileFormat.DAA, doc.getBlastMode(), new String[]{daaFile}, new String[]{}, rma6FileName, useCompression, doc, maxMatchesPerRead);
        rma6Creator.parseFiles(progressListener);
    }

    /**
     * create an RMA6 file from a pair of DAA files
     *
     * @param daaFile1
     * @param daaFile2
     * @param rma6FileName
     * @param maxMatchesPerRead
     * @param progressListener  @throws CanceledException
     */
    private static void createRMA6FileFromDAAPair(String creator, String daaFile1, String daaFile2, String rma6FileName, boolean useCompression, Document doc,
                                                  int maxMatchesPerRead, ProgressListener progressListener) throws IOException, CanceledException, SQLException {
        final RMA6FromBlastCreator rma6Creator =
                new RMA6FromBlastCreator(creator, BlastFileFormat.DAA, doc.getBlastMode(), new String[]{daaFile1, daaFile2}, new String[]{}, rma6FileName, useCompression, doc, maxMatchesPerRead);
        rma6Creator.parseFiles(progressListener);
    }
}
