#include <getopt.h>
#include <pthread.h>
#include <sched.h>
#include <unistd.h>

#include <cassert>
#include <cerrno>
#include <csignal>
#include <cstdlib>
#include <cstring>

#include <algorithm>
#include <fstream>
#include <future>
#include <iomanip>
#include <ios>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include <sstream>
#include <string>

#include <enyx/hw/accelerator.hpp>
#include <enyx/hw/core.hpp>
#include <enyx/hw/core_tree.hpp>
#include <enyx/hw/mmio.hpp>

#include <enyx/cores/hardware_ids.hpp>

#include <enyx/cores/data_stream/hw_source.hpp>
#include <enyx/cores/probes/data_stream.hpp>
#include <enyx/cores/probes/collector.hpp>
#include <enyx/cores/probes/probe.hpp>
#include <enyx/cores/probes/system.hpp>
#include <enyx/cores/probes/timestamp.hpp>

namespace {

namespace h = enyx::hw;
namespace p = enyx::probes;

volatile std::sig_atomic_t is_exit_requested;

struct arguments
{
    std::uint32_t accelerator_id;
    std::uint32_t collector_id;
    int core;
    std::ofstream output;
};

void
usage(char const * name)
{
    std::cout << "Usage: " << name << " [OPTION]...\n"
            "Read data continously from a collector.\n"
            "\n"
            "Optional arguments:\n"
            "  -a, --accelerator-id  Select the accelerator index (default: 0)\n"
            "  -c, --collector-id    Select the collector index (default: 0)\n"
            "  -p, --core            Bind the execution to a core\n"
            "  -o, --output          Write to a file instead of stdin\n"
            "  -h, --help            Display this help and exit\n"
            << std::flush;
}

int
parse_args(int argc, char * argv[], arguments & args)
{
    struct option const options[] = {
        {"accelerator-id", true, nullptr, 'a'},
        {"collector-id", true, nullptr, 'c'},
        {"core", true, nullptr, 'p'},
        {"output", true, nullptr, 'o'},
        {},
    };

    args.core = -1;
    int opt;
    while ((opt = ::getopt_long(argc, argv,
                                "a:c:p:o:h", options, nullptr)) != -1)
    {
        switch (opt)
        {
        case 'a':
            args.accelerator_id = std::atoi(optarg);
            break;
        case 'c':
            args.collector_id = std::atoi(optarg);
            break;
        case 'p':
            args.core = std::atoi(optarg);
            break;
        case 'o':
            args.output = std::ofstream{optarg};
            if (! args.output)
            {
                std::cerr << "Failed to open " << optarg << std::endl;
                return  -1;
            }
            break;

        default:
            usage(argv[0]);
            return -1;
        }
    }

    if (optind != argc)
    {
        std::cerr << "Unexpected argument \"" << argv[optind] << "\".\n"
                  << std::flush;
        return -1;
    }

    return 0;
}

void
bind_to_cpu(int cpu_id)
{
    if (cpu_id == -1)
        return;

    if (cpu_id >= CPU_SETSIZE)
    {
        std::ostringstream error;
        error << "Invalid CPU " << cpu_id
              << " > CPU_SETSIZE(" << CPU_SETSIZE << ")";
        throw std::runtime_error{error.str()};
    }

    cpu_set_t set;
    CPU_ZERO(&set);
    CPU_SET(cpu_id, &set);
    int failure = ::pthread_setaffinity_np(::pthread_self(),
                                           sizeof(set), &set);
    if (failure)
    {
        std::ostringstream error;
        error << "Failed to bind to CPU " << cpu_id;
        throw std::system_error{failure, std::generic_category(), error.str()};
    }
};

template<typename Iterator>
void
print_hex(std::ostream & out, Iterator i, Iterator e)
{
    std::reverse_iterator<Iterator> ri{e}, re{i};
    for (; ri != re; ++ri)
        out << std::setw(2) << int(*ri);
}

struct record
{
    p::probe::id probe_id;
    std::array<std::uint8_t, 12> timestamp;
    std::array<std::uint8_t, 11> custom_info;
    std::array<std::uint8_t, 8> reference_id;
};

void
fill_record(record & new_record, p::probe const& probe, p::event const& event)
{
    new_record.probe_id = probe.get_id().value();

    std::memcpy(new_record.reference_id.data(),
                event.reference_id,
                new_record.reference_id.size());

    std::memcpy(new_record.timestamp.data(),
                event.timestamp,
                new_record.timestamp.size());

    std::memcpy(new_record.custom_info.data(),
                event.custom_info,
                new_record.custom_info.size());
}

void
print_record(std::ostream & out, record const& new_record,
             std::vector<p::probe> const& probes)
{
    auto const format = probes.at(new_record.probe_id).get_event_format();

    out << "0x" << int(new_record.probe_id) << ",0x";

    // Print reference id
    {
        auto i = new_record.reference_id.begin(),
             e = std::next(i, format.reference_id_size);
        print_hex(out, i, e);
    }

    // Print timestamp
    if (format.ts_format == ENYX_PROBES_TS_64_V1)
    {
        auto const ts = p::ts_parse_64_v1(new_record.timestamp.data());
        out << ",0x0,0x" << ts.ns << ",0x" << ts.fns << ",0x";
    }
    else if (format.ts_format == ENYX_PROBES_TS_96_V1)
    {
        auto const ts = p::ts_parse_96_v1(new_record.timestamp.data());
        out << ",0x" << ts.s << ",0x" << ts.ns << ",0x" << ts.fns << ",0x";
    }
    else
        throw std::runtime_error{"Unknown timestamp format"};

    // Print custom info
    {
        auto i = new_record.custom_info.begin(),
             e = std::next(i, format.custom_info_size);
        print_hex(out, i, e);
    }

    out << "\n";
}

void
print_records(std::ostream & output, std::queue<record> & records,
             std::vector<p::probe> const& probes)
{
    for (;! records.empty(); records.pop())
        print_record(output, records.front(), probes);
}

void
run(arguments & args)
{
    // Optionally pin current thread to a CPU
    bind_to_cpu(args.core);

    // This handle will break the main
    auto on_exit = [] (int sig) {
        std::signal(sig, SIG_DFL);
        is_exit_requested = 1;
    };
    std::signal(SIGINT, on_exit);
    std::signal(SIGTERM, on_exit);

    // Construct a filter that request the accelerator
    h::filter const select_accelerator{h::index{args.accelerator_id}};

    // Find and instantiate the requested accelerator
    auto const accelerator_descriptors = h::enumerate_accelerators(select_accelerator);
    if (accelerator_descriptors.size() != 1)
    {
        std::ostringstream error;
        error << "Expecting exactly 1 accelerator, got "
              << accelerator_descriptors.size();
        throw std::runtime_error{error.str()};
    }
    h::accelerator accelerator{accelerator_descriptors[0]};

    // Find and instantiate the first mmio from the first accelerator
    h::filter const select_mmio{h::index{0}};
    auto const mmio_descriptors = accelerator.enumerate_mmios(select_mmio);
    if (mmio_descriptors.size() != 1)
    {
        std::ostringstream error;
        error << "Expecting exactly 1 mmio, got " << mmio_descriptors.size();
        throw std::runtime_error{error.str()};
    }
    h::mmio mmio{mmio_descriptors[0]};

    auto tree = h::enumerate_cores(mmio);
    auto root = tree.get_root();

    auto collectors = root.enumerate(enyx::hardware_ids::EVENT_COLLECTOR);
    if (collectors.empty())
        throw std::runtime_error{"Can't find any event collector"};

    if (args.collector_id >= collectors.size())
    {
        std::ostringstream error;
        error << "Collector " << args.collector_id << " is out of range";
        throw std::runtime_error{error.str()};
    }

    auto collector = enyx::probes::collector{collectors[args.collector_id]};

    auto mtg = enyx::probes::mtg{root};

    auto probe_cores = root.enumerate(enyx::hardware_ids::EVENT_PROBE);
    std::vector<enyx::probes::probe> probes;
    enyx::probes::probe::id i = 0;
    for (auto & probe_core: probe_cores) {
        auto probe = enyx::probes::probe{probe_core, i,
                                         mtg.get_ts_format().v()};
        if (probe.get_collector_name() == collector.get_name()) {
            probes.push_back(std::move(probe));
            i++;
        }
    }

    if (probes.empty())
    {
        std::ostringstream error;
        error << "Can't retrieve collector " << args.collector_id
              << "'s probe(s)";
        throw std::runtime_error{error.str()};
    }

    h::filter dma_filter{h::name{collector.get_stream_name()}};
    auto streams = accelerator.enumerate_a2c_streams(dma_filter);
    if (streams.size() != 1)
    {
        std::ostringstream error;
        error << "Found " << streams.size() << " DMA channels for collector "
              << args.collector_id << ", expected 1.";
        throw std::runtime_error{error.str()};
    }
    auto stream = h::a2c_stream{streams[0]};

    // Create queue to transfer records from reader thread to writer
    std::queue<record> records_buffer;
    std::condition_variable records_cond;
    std::mutex records_mutex;

    // Select the output
    std::ostream & output = args.output.is_open() ? args.output : std::cout;
    output << std::hex << std::setfill('0');

    // Create the writer thread
    auto writer = [&] {
        std::queue<record> records_to_write;

        while (! is_exit_requested)
        {
            // Grab the reader thread's records
            {
                std::unique_lock<std::mutex> lock{records_mutex};
                // Should not loop here in case the reader thread
                // has exited and no more record will be pushed
                // into records_buffer (i.e. is_exit_requested == true)
                if (records_buffer.empty())
                    records_cond.wait(lock);
                std::swap(records_buffer, records_to_write);
            }

            print_records(output, records_to_write, probes);
        }
    };

    output << "PROBE_ID,REFERENCE_ID,SEC,NS,FNS,CUSTOM_INFO" << std::endl;
    auto writer_thread = std::async(std::launch::async, writer);

    std::queue<record> read_records;
    // on_event callback only pushes records into the read queue
    auto on_event = [&] (std::uint8_t probe_id, p::event const& event) {
        auto & probe = probes.at(probe_id);
        read_records.emplace();
        fill_record(read_records.back(), probe, event);
    };

    auto on_error = [&] () {
        throw std::runtime_error{"Received an error on collector DMA channel."};
    };

    p::source event_source{on_event, on_error};
    enyx::data_stream::hw_source hw_source{stream, event_source};

    for (std::size_t i = 0; ! is_exit_requested; ++i)
    {
        // Move records to the read queue
        hw_source.poll_once();

        // Check records buffer every N cycles
        if (i % (1024 * 1024) == 0)
        {
            {
                std::unique_lock<std::mutex> lock{records_mutex};
                // Check if the writer has grabbed the previous
                // records series, do nothing if not
                if (! records_buffer.empty())
                    continue;

                // Else, move the read records series to the
                // buffer for the writer to grab them
                std::swap(read_records, records_buffer);
            }
            records_cond.notify_one();
        }
    }

    // Unlock the writer thread
    records_cond.notify_one();
    writer_thread.get();

    // Print records from buffer not extracted by writer
    print_records(output, records_buffer, probes);

    // Print records not transferred to buffer by reader
    print_records(output, read_records, probes);

    output << std::flush;
}

} // anonymous namespace

int main(int argc, char *argv[])
{
    arguments args{};
    if (parse_args(argc, argv, args) < 0)
        return EXIT_FAILURE;

    try
    {
        run(args);
        return EXIT_SUCCESS;
    }
    catch(std::exception const& e)
    {
        std::cerr << e.what() << std::endl;
    }
    catch(...)
    {
        std::cerr << "An unknown error occurred" << std::endl;
    }

    return EXIT_FAILURE;
}

