/**
 * \file src/input/mzidentml/mzidentmlreader.h
 * \date 24/11/2022
 * \author Olivier Langella
 * \brief new method to read mzIdentML XML files
 */


/*******************************************************************************
 * Copyright (c) 2022 Olivier Langella
 *<Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of i2MassChroQ.
 *
 *     i2MassChroQ is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     i2MassChroQ is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with i2MassChroQ.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/
#pragma once

#include "pappsomspp/core/processing/xml/xmlstreamreaderinterface.h"
#include "pappsomspp/core/processing/uimonitor/uimonitorinterface.h"
#include "pappsomspp/core/processing/cbor/psm/psmproteinmap.h"
#include "pappsomspp/core/protein/protein.h"
#include "pappsomspp/core/peptide/peptide.h"

namespace pappso
{
namespace cbor
{
namespace psm
{


/**
 * @todo write docs
 */
class MzIdentMlReader : public pappso::XmlStreamReaderInterface
{
  public:
  /**
   * Default constructor
   */
  MzIdentMlReader(pappso::UiMonitorInterface *p_monitor,
                  pappso::cbor::CborStreamWriter *p_output,
                  const QFileInfo &mzident_file);
  /**
   * Destructor
   */
  virtual ~MzIdentMlReader();


  protected:
  virtual void readStream() override;


  private:
  bool readAnalysisSoftware();
  bool readSequenceCollectionItem();
  bool readAnalysisCollectionItem();
  bool readDataCollectionItem();
  void readDBSequence();
  void readPeptide();
  bool readPeptideEvidence();
  void readSpectrumIdentification();
  void readInputs();
  void readAnalysisData();
  bool readSearchDatabase();
  void readSpectraData();
  void readSpectrumIdentificationResult();
  void readAnalysisProtocolCollection();

  void finalDebrief();


  /** \def IdentificationEngine identification engine
   *
   */
  enum class IdentificationEngine : std::int8_t
  {
    unknown  = 0,      ///< X!Tandem
    XTandem  = 1,      ///< MS:1001476 X!Tandem was used to analyze the spectra.
    mascot   = 2,      ///< MS:1001207 The name of the Mascot search engine.
    peptider = 3,      ///< peptider
    OMSSA    = 4,      ///< MS:1001475 Open Mass Spectrometry Search Algorithm was used to
                       ///< analyze the spectra.
    SEQUEST = 5,       ///< MS:1001208 The name of the SEQUEST search engine.
    Comet   = 6,       ///< MS:1002251 Comet open-source sequence search engine developed
                       ///< at the University of Washington. PMID:23148064
    Morpheus     = 7,  ///< MS:1002661 "Morpheus search engine." [PMID:23323968]
    MSGFplus     = 8,  ///< MS:1002048 "MS-GF+ software used to analyze the spectra." [PSI:PI]
    SpecOMS      = 9,  ///< SpecOMS C++ implementation
    sage         = 10, ///< sage
    PEAKS_Studio = 11, ///< PEAKS Studio
  };


  struct CvParam
  {
    QString cvRef;
    QString accession;
    QString name;
    QString value;
    QString unitAccession;
    QString unitName;
    QString unitCvRef;

    QString toString() const;
  };

  struct Modification
  {
    double monoisotopicMassDelta;
    std::size_t location;
    CvParam cvParam;
  };

  struct MzidDBSequence
  {
    QString accession;
    QString searchDatabase_ref;
    QString sequence;
    QString description;
    std::shared_ptr<Protein> protein_sp;
    bool is_decoy;
    std::vector<CvParam> cvParamList;
  };

  struct MzidSearchDatabase
  {
    QString file;
  };

  struct MzidSpectraData
  {
    QString file;
    QString name;
  };
  struct MzidPeptideEvidence
  {
    ProteinSp protein;
    PeptideSp peptide;
    std::size_t start;
    std::size_t end;
    bool isDecoy;
  };

  struct UserParam
  {
    QString name;
    QString value;
    QString toString() const;
  };

  struct SpectrumIdentificationItem
  {
    unsigned int chargeState;
    double experimentalMassToCharge;
    double calculatedMassToCharge;
    PeptideSp peptide;
    std::vector<MzidPeptideEvidence> mzidPeptideEvidenceList;

    std::vector<CvParam> cvParamList;
    std::vector<UserParam> userParamList;
  };

  struct SpectrumIdentificationResult
  {
    QString id;
    QString spectrumID;
    // IdentificationMzIdentMlFileSp mzident_source_sp;
    // IdentificationGroup *identification_group_p;
    std::size_t scanNum;
    std::size_t spectrumIndex;
    bool isSpectrumIndex = false;
    double retentionTime;
    std::vector<SpectrumIdentificationItem> spectrumIdentificationItemList;

    std::vector<CvParam> cvParamList;
    std::vector<UserParam> userParamList;
  };

  CvParam readCvParam();

  UserParam readUserParam();


  void readSpectrumIdentificationItem(SpectrumIdentificationResult &spectrum_identification_result);

  //  void
  //  processSpectrumIdentificationItem(SpectrumIdentificationResult
  //  &spectrum_identification_result,
  //                                    const SpectrumIdentificationItem
  //                                    &spectrumIdentificationItem);

  void writeSpectrumIdentificationResult(
    const SpectrumIdentificationResult &spectrum_identificatio_result);


  bool writeTandemEval(const std::vector<CvParam> &cv_param_list);

  void
  writeSpectrumIdentificationItem(const SpectrumIdentificationItem &spectrum_identification_item);

  private:
  pappso::UiMonitorInterface *mp_monitor;
  // Project *mp_project;
  IdentificationEngine m_identificationEngine;
  QString m_analysisSoftwareVersion;
  pappso::cbor::CborStreamWriter *mp_cborWriter;
  PsmProteinMap m_proteinMap;


  /** @brief store association between xml ID and an identification engine
   */
  std::map<QString, IdentificationEngine> m_IdentificationEngineMap;


  /** @brief store association between xml ID and fasta files
   */
  std::map<QString, MzidSearchDatabase> m_mzidSearchDatabaseIdMap;


  /** @brief store association between xml ID and peptide sequence
   */
  std::map<QString, PeptideSp> m_PeptideIdMap;


  /** @brief store association between xml ID and peptide evidence
   */
  std::map<QString, MzidPeptideEvidence> m_MzidPeptideEvidenceIdMap;


  /** @brief store association between xml ID and SpectraData
   */
  std::map<QString, MzidSpectraData> m_mzidSpectraDataIdMap;

  /** @brief store association between xml ID and DBSequence
   */
  std::map<QString, MzidDBSequence> m_MzidDBSequenceIdMap;

  /** @brief associates database ref id to protein shared pointer
   * because the search database id is not described before the protein (silly
   * idea IMHO) we keep association of protein to database in this map until the
   * real search database definition appears We then have to reprocess each
   * protein to set the right fasta file pointer
   */
  std::map<QString, std::vector<ProteinSp>> m_searchDatabase_ref2proteinList;


  /** @brief store all identification results by spectra xml id
   */
  std::map<QString, std::vector<SpectrumIdentificationResult>>
    m_spectrumIdentificationResultBySpectraIdMap;

  QFileInfo m_mzidentFile;
  QCborMap m_cborParameterMap;
};
} // namespace psm
} // namespace cbor
} // namespace pappso
