import React, { useState } from 'react';
import {
    Box,
    Typography,
    TextField,
    Button,
    Snackbar,
    Link,
    Alert,
    useTheme
} from '@mui/material';
import ContentCopyIcon from '@mui/icons-material/ContentCopy';

function WebScraper() {
    const theme = useTheme();

    const scriptContent = `
// ==UserScript==
// @name     Scrape and POST Current Page (Tampermonkey)
// @version  1
// @grant    GM_xmlhttpRequest
// @grant    GM_setValue
// @grant    GM_getValue
// @connect  aissistantalpha.calmbeach-51448e60.eastus.azurecontainerapps.io
// @connect  localhost
// @run-at   document-idle
// @include  *
// @require  https://code.jquery.com/jquery-3.6.0.min.js
// ==/UserScript==
(function() {
    'use strict';

    function findElementByIdDeep(obj, id) {
        if (obj.id === id) {
            return obj;
        }

        let result = null;
        let childNodes = obj.childNodes;

        if (obj.shadowRoot) {
            result = findElementByIdDeep(obj.shadowRoot, id);
            if (result) {
                return result;
            }
        }

        for (let i = 0; i < childNodes.length; i++) {
            result = findElementByIdDeep(childNodes[i], id);
            if (result) {
                return result;
            }
        }

        return null;
    }

    function recreateDocumentWithShadowDOM(originalDocument) {
        // Create a new document
        const recreatedDocument = document.implementation.createHTMLDocument();

        // Recursive function to extract shadow DOM content
        function extractShadowDOM(originalNode, recreatedNode) {
            // If the original node has a shadow root, extract its content
            if (originalNode.shadowRoot) {
                // Get all child nodes of the shadow root
                const shadowChildNodes = originalNode.shadowRoot.childNodes;

                // Loop through each child node of the shadow root
                shadowChildNodes.forEach(shadowChild => {
                    // Recreate the shadow child node
                    const recreatedShadowChild = recreateNode(shadowChild);

                    // Append the recreated shadow child to the recreated node
                    recreatedNode.appendChild(recreatedShadowChild);

                    // Recursively extract nested shadow DOMs
                    extractShadowDOM(shadowChild, recreatedShadowChild);
                });
            }

            // Loop through each child node of the original node
            originalNode.childNodes.forEach(childNode => {
                // Recreate the child node
                const recreatedChildNode = recreateNode(childNode);

                // Append the recreated child node to the recreated node
                recreatedNode.appendChild(recreatedChildNode);

                // Recursively extract shadow DOMs from child nodes
                extractShadowDOM(childNode, recreatedChildNode);
            });
        }

        // Function to recreate a node based on its type
        function recreateNode(node) {
            if (node.nodeType === Node.ELEMENT_NODE) {
                // Recreate an element node
                const recreatedElement = recreatedDocument.createElement(node.tagName);

                // Copy attributes to the recreated element
                Array.from(node.attributes).forEach(attr => {
                    recreatedElement.setAttribute(attr.name, attr.value);
                });

                return recreatedElement;
            } else if (node.nodeType === Node.TEXT_NODE) {
                // Recreate a text node
                return recreatedDocument.createTextNode(node.textContent);
            } else {
                // Unsupported node type, return an empty document fragment
                return recreatedDocument.createDocumentFragment();
            }
        }

        // Start extracting shadow DOMs from the root of the original document
        extractShadowDOM(originalDocument.documentElement, recreatedDocument.documentElement);

        // Return the recreated document with shadow DOM content extracted
        return recreatedDocument;
    }

    // Function to add auto-incremented IDs to elements, including elements in shadow DOMs
    function addAutoIncrementedIds(node, idCounter = 0) {
        // Function to recursively process a node and its children
        function processNode(node) {
            if (node.nodeType === Node.ELEMENT_NODE) {
                if (!node.id) {
                    node.id = \`\${idCounter}\`;
                    idCounter++;
                }

                // If the node has a shadow root, recursively process it
                if (node.shadowRoot) {
                    idCounter = addAutoIncrementedIds(node.shadowRoot, idCounter);
                }

                // Recursively process child nodes
                for (let child of node.children) {
                    idCounter = processNode(child);
                }
            }
            return idCounter;
        }

        // Start processing at the given node
        idCounter = processNode(node);
        return idCounter;
    }

    function scrapeCurrentPage() {
        addAutoIncrementedIds(document);

        var url = window.location.href;
        var originalDocument = document.documentElement.cloneNode(true);
        var clonedDocument = recreateDocumentWithShadowDOM(document);

        console.log('Cloned document:')
        console.log(clonedDocument);
        console.log(clonedDocument.innerHTML);
        console.log('Original document:');
        console.log(document.documentElement.innerHTML);

        // List of classes to remove
        var classesToRemove = ['collection-internal'];
        classesToRemove.forEach(function(className) {
            var elements = clonedDocument.getElementsByClassName(className);
            while (elements.length > 0) {
                elements[0].parentNode.removeChild(elements[0]);
            }
        });
        // Remove comment nodes
        function removeComments(node) {
            for (var i = 0; i < node.childNodes.length; i++) {
                var child = node.childNodes[i];
                if (child.nodeType === 8) {
                    // Comment node found, remove it
                    node.removeChild(child);
                    //console.log('Removing child: ', child);
                    i--;
                } else if (child.nodeType === 1) {
                    // Element node found, recursively remove comments from its children
                    removeComments(child);
                }
            }
        }

        removeComments(clonedDocument);

        // Filter out elements that are not visible on the page
        var allElements = clonedDocument.getElementsByTagName('*');
        for (var i = 0; i < allElements.length; i++) {
            var element = allElements[i];
            var computedStyle = window.getComputedStyle(element);
            if (computedStyle.display === 'none' || computedStyle.visibility === 'hidden') {
                //console.log('Removing non-visible: ', element);
                element.parentNode.removeChild(element);
            }
        }
        // List of tags to remove, but keep their children
        var tagsToRemoveButKeepChildren = [];
        tagsToRemoveButKeepChildren.forEach(function(tagName) {
            var tags = clonedDocument.getElementsByTagName(tagName);
            while (tags.length > 0) {
                var tag = tags[0];
                if (!tag.onclick) {
                    var parent = tag.parentNode;
                    var children = Array.from(tag.childNodes);
                    children.forEach(function(child) {
                        parent.insertBefore(child, tag);
                    });
                    //console.log('Removing tag: ', tag);
                    parent.removeChild(tag);
                }
            }
        });
        // List of tags to remove completely
        var tagsToRemove = ['script', 'style', 'svg', 'meta'];
        tagsToRemove.forEach(function(tagName) {
            var tags = clonedDocument.getElementsByTagName(tagName);
            while (tags.length > 0) {
                console.log('Removing tag completely: ', tags[0]);
                tags[0].parentNode.removeChild(tags[0]);
            }
        });
        // Whitelist of attributes to keep
        var whitelistAttributes = ['type', 'id', 'name', 'jsname'];
        allElements = clonedDocument.getElementsByTagName('*');
        for (var iw = 0; iw < allElements.length; iw++) {
            element = allElements[iw];
            var attributes = Array.from(element.attributes);
            attributes.forEach(function(attr) {
                if (!whitelistAttributes.includes(attr.name)) {
                    //console.log('Removing attribute on element: ', attr.name, element);
                    element.removeAttribute(attr.name);
                }
            });
        }
        console.log('Cloned document: ', clonedDocument);
        var content = clonedDocument.documentElement.outerHTML;

        function stripHTMLAndJS(html) {
            // Remove HTML tags
            let strippedHTML = html.replace(/<[^>]*>?/gm, ' ');

            // Remove JavaScript code
            let strippedJS = strippedHTML.replace(/(<script\\b[^>]*>).*?(<\\/script>)/gis, ' ');

            // Replace &nbsp; with a space
            let replacedNbsp = strippedJS.replace(/&nbsp;/g, ' ');

            // Replace all newline characters (\\r\\n, \\r, \\n) with a single space
            let replacedSpaces = replacedNbsp.replace(/(\\r\\n|\\r|\\n)+/g, ' ');

            // Replace multiple spaces with a single space
            replacedSpaces = replacedSpaces.replace(/ +/g, ' ');


            // Trim leading and trailing whitespace
            let trimmedResult = replacedSpaces.trim();

            return trimmedResult;
        }

        content = stripHTMLAndJS(content);

        console.log(content);

        return {
            associations: [],
            types: ["Source"],
            attributes: [{"name":"reference", "value":url, "type":"string"}],
            content: content,
            description:"Content scraped from " + url,
            name: document.title,
            contentType: "text/plain",
            runDataPreperation: true
        };
    }

    function getJWTToken() {
        return GM_getValue('jwtToken', null);
    }

    function setJWTToken(token) {
        GM_setValue('jwtToken', token);
    }

    function clearJWTToken() {
        GM_setValue('jwtToken', null);
    }

    function showLoginForm() {
        const loginForm = document.createElement('div');
        loginForm.innerHTML = \`
            <div style="position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%); background-color: white; padding: 20px; border: 1px solid black; z-index: 10000;">
                <h2>Login</h2>
                <input type="text" id="username" placeholder="Username" style="display: block; margin-bottom: 10px;">
                <input type="password" id="password" placeholder="Password" style="display: block; margin-bottom: 10px;">
                <button id="loginButton">Login</button>
            </div>
        \`;
        document.body.appendChild(loginForm);

        document.getElementById('loginButton').addEventListener('click', handleLogin);
    }

    function handleLogin() {
        const username = document.getElementById('username').value;
        const password = document.getElementById('password').value;

        const loginRequest = { username, password };
        console.log("Sending login request:", loginRequest);

        GM_xmlhttpRequest({
            method: "POST",
            //url: "http://localhost:8080/api/auth/login",
            url: "https://aissistantalpha.calmbeach-51448e60.eastus.azurecontainerapps.io/api/auth/login",
            data: JSON.stringify(loginRequest),
            headers: {
                "Content-Type": "application/json"
            },
            onload: function(response) {
                console.log("Full response:", response);
                if (response.status === 200) {
                    try {
                        const responseData = JSON.parse(response.responseText);
                        const token = responseData.accessToken;
                        if (token) {
                            setJWTToken(token);
                            document.body.removeChild(document.body.lastChild); // Remove login form
                            handleButtonClick(); // Proceed with collection
                        } else {
                            alert("Login successful, but no token received. Please check the response format.");
                        }
                    } catch (error) {
                        console.error("Error parsing response:", error);
                        alert("Login failed. Unexpected response format.");
                    }
                } else {

                    const responseData = JSON.parse(response.responseText);
                    console.error("Login failed. Status:", response.status);
                    console.error("Response text:", response.responseText);
                    alert(\`Login failed. Status: \${response.status}. Please check the console for details.\`);
                }
            },
            onerror: function(error) {
                console.error("Error during login:", error);
                alert(\`Login failed. Error: \${error.statusText || "Unknown error"}. Please check the console for more details.\`);
            }
        });
    }

    function postDataToLocalhost(data) {
        try {
            console.log("Preparing to send POST request with data:", data);

            if (!data || typeof data !== 'object') {
                throw new Error('Invalid data format');
            }

            const jsonData = JSON.stringify(data);
            console.log("Stringified data:", jsonData);

            const jwtToken = getJWTToken();

            if (!jwtToken) {
                console.error("No JWT token available. Please log in.");
                showLoginForm();
                return;
            }

            GM_xmlhttpRequest({
                method: "POST",
                //url: "http://localhost:8080/api/aissistant/ingest/content?async=true",
                url: "https://aissistantalpha.calmbeach-51448e60.eastus.azurecontainerapps.io/api/aissistant/ingest/content?async=true",
                data: jsonData,
                headers: {
                    "Content-Type": "application/json",
                    "Authorization": \`Bearer \${jwtToken}\` // Add JWT token to headers
                },
                onload: function(response) {
                    console.log("Full response:", response);
                    if (response.status === 200) {
                        console.log("POST request sent successfully!");
                        console.log("Response text:", response.responseText);

                        // Hide all buttons except the success check
                        button.style.display = 'none';
                        submitButton.style.display = 'none';

                        // Replace red X with green check
                        closeButton.style.display = 'none';
                        successCheck.style.display = 'block';
                    } else if (response.status === 401) {
                        console.error("Unauthorized. Token might be expired.");
                        clearJWTToken();
                        showLoginForm();
                    } else {
                        console.error("Error sending POST request. Status:", response.status);
                        console.error("Response text:", response.responseText);
                    }

                    // Reset submit button state
                    resetSubmitButton();
                },
                onerror: function(error) {
                    console.error("Error sending POST request:", error);
                    console.error("Error details:", JSON.stringify(error, null, 2));

                    // Reset submit button state
                    resetSubmitButton();
                }
            });
        } catch (error) {
            console.error("Caught error in postDataToLocalhost:", error);

            // Reset submit button state
            resetSubmitButton();
        }
    }

    function resetSubmitButton() {
        submitButton.disabled = false;
        submitButton.style.cursor = 'pointer';
        submitButton.textContent = 'Submit';

        // Clear loading animation
        if (submitButton.loadingInterval) {
            clearInterval(submitButton.loadingInterval);
            submitButton.loadingInterval = null;
        }

        var loadingDots = document.getElementById('loading-dots');
        if (loadingDots) {
            loadingDots.remove();
        }
    }

    function handleButtonClick() {
        if (!getJWTToken()) {
            showLoginForm();
        } else {
            button.style.display = 'none';
            submitButton.style.display = 'block';
        }
    }

    function handleCloseClick() {
        containerDiv.style.display = 'none';
    }

    function handleSubmitClick() {
        var scrapedData = scrapeCurrentPage();
        postDataToLocalhost(scrapedData);

        // Disable the button and show loading indicator
        submitButton.disabled = true;
        submitButton.style.cursor = 'not-allowed';
        submitButton.textContent = 'Submitting...';

        // Add loading animation
        var loadingDots = document.createElement('span');
        loadingDots.id = 'loading-dots';
        loadingDots.textContent = '';
        submitButton.appendChild(loadingDots);

        animateLoadingDots();
    }

    function animateLoadingDots() {
        var dotsElement = document.getElementById('loading-dots');
        var dotCount = 0;
        var maxDots = 3;

        function updateDots() {
            dotCount = (dotCount + 1) % (maxDots + 1);
            dotsElement.textContent = '.'.repeat(dotCount);
        }

        var dotInterval = setInterval(updateDots, 500);

        // Store the interval ID on the button for later cleanup
        submitButton.loadingInterval = dotInterval;
    }

    var button = document.createElement('button');
    button.id = 'collection-submit';
    button.className = 'collection-internal';
    button.textContent = 'Collect';
    button.style.backgroundColor = 'black';
    button.style.color = 'white';
    button.style.border = '2px solid white';
    button.style.padding = '5px 10px';
    button.style.cursor = 'pointer';
    button.addEventListener('click', handleButtonClick);

    var closeButton = document.createElement('button');
    closeButton.id = 'collection-close';
    closeButton.className = 'collection-internal';
    closeButton.textContent = 'X';
    closeButton.style.backgroundColor = 'red';
    closeButton.style.color = 'white';
    closeButton.style.border = 'none';
    closeButton.style.padding = '5px 10px';
    closeButton.style.borderRadius = '4px';
    closeButton.style.marginLeft = '10px';
    closeButton.addEventListener('click', handleCloseClick);

    var submitButton = document.createElement('button');
    submitButton.textContent = 'Submit';
    submitButton.style.display = 'none';
    submitButton.style.backgroundColor = 'black';
    submitButton.style.color = 'white';
    submitButton.style.border = '2px solid white';
    submitButton.style.padding = '5px 10px';
    submitButton.style.cursor = 'pointer';
    submitButton.addEventListener('click', handleSubmitClick);

    var successCheck = document.createElement('button');
    successCheck.textContent = '✓';
    successCheck.style.display = 'none';
    successCheck.style.backgroundColor = 'green';
    successCheck.style.color = 'white';
    successCheck.style.border = 'none';
    successCheck.style.padding = '5px 10px';
    successCheck.style.borderRadius = '4px';
    successCheck.style.cursor = 'pointer';
    successCheck.addEventListener('click', handleCloseClick);

    var containerDiv = document.createElement('div');
    containerDiv.style.position = 'fixed';
    containerDiv.style.bottom = '10px';
    containerDiv.style.right = '10px';
    containerDiv.style.zIndex = '9999';
    containerDiv.style.display = 'flex';
    containerDiv.style.alignItems = 'center';

    containerDiv.appendChild(button);
    containerDiv.appendChild(submitButton);
    containerDiv.appendChild(closeButton);
    containerDiv.appendChild(successCheck);
    document.body.appendChild(containerDiv);
})();
    `
    const [openSnackbar, setOpenSnackbar] = useState(false);

    const handleCopyToClipboard = () => {
        navigator.clipboard.writeText(scriptContent).then(() => {
            setOpenSnackbar(true);
        });
    };

    const handleCloseSnackbar = (event, reason) => {
        if (reason === 'clickaway') {
            return;
        }
        setOpenSnackbar(false);
    };

    return (
        <Box
            sx={{
                width: '100vw',
                height: '100vh',
                display: 'flex',
                flexDirection: 'column',
                overflow: 'hidden',
                padding: '20px',
                boxSizing: 'border-box'
            }}
        >
            <Typography variant="h4" component="h1" gutterBottom>
                Script Content
            </Typography>

            <Typography component="h1" gutterBottom>
                First install <Link href="https://www.tampermonkey.net/" target="_blank" rel="noopener">TamperMonkey</Link> in your browser and add this script.
                <br/><br/>
                It will first ask you to log in to obtain an access token to save content as a Source. Once it has the token, it will simply submit.
                <br/><br/>
                <Alert
                    severity="warning"
                    sx={{
                        mb: 2,
                        backgroundColor: theme.palette.warning.light,
                        color: theme.palette.warning.contrastText,
                        '& .MuiAlert-icon': {
                            color: theme.palette.warning.main,
                        },
                    }}
                >
                <strong>Note:  Make sure there is no private or sensitive information on the screen.  Some information will be persisted and also sent to LLM models.</strong>
                </Alert>
                It should display a black button on the lower right of pages you visit (and inner iframes). First click on 'Collect', then confirm to scrape the page by clicking on 'Submit', then WAIT for the green checkmark. (Async loading to come soon for faster submitting.)
            </Typography>

            <TextField
                fullWidth
                multiline
                rows={20}
                value={scriptContent}
                variant="outlined"
                InputProps={{
                    readOnly: true,
                }}
                sx={{ mb: 2, flexGrow: 0 }}
            />
            <Button
                variant="contained"
                startIcon={<ContentCopyIcon />}
                onClick={handleCopyToClipboard}
            >
                Copy to Clipboard
            </Button>
            <Snackbar
                open={openSnackbar}
                autoHideDuration={3000}
                onClose={handleCloseSnackbar}
                message="Copied to clipboard!"
            />
        </Box>
    );

}

export default WebScraper;
