Secure Login

Extracting Latest Sports News...

Loading content...

News Scraper

Enter URLs (one per line):


`; return embedsHtml; } // Function to extract the current category from the DOM function getCurrentCategoryFromDOM() { try { // Look for the active or current category element in the DOM const activeColumnHeader = document.querySelector('.column-header.active') || document.querySelector('.column-header.selected') || document.querySelector('.column-header'); // If found, extract the category text if (activeColumnHeader) { const categorySpan = activeColumnHeader.querySelector('span'); if (categorySpan) { const category = categorySpan.textContent.trim(); console.log(`Extracted category from DOM: ${category}`); return category; } } // If we can't find it directly in the DOM structure, // try to extract it from any element that has the category attribute const elementWithCategory = document.querySelector('[data-category]'); if (elementWithCategory) { const category = elementWithCategory.getAttribute('data-category'); console.log(`Extracted category from data attribute: ${category}`); return category; } // As a last resort, try to parse it from the URL if it might be there const urlParams = new URLSearchParams(window.location.search); const categoryFromUrl = urlParams.get('category'); if (categoryFromUrl) { console.log(`Extracted category from URL: ${categoryFromUrl}`); return categoryFromUrl; } console.log("Could not extract category from DOM, using fallback method"); return null; } catch (error) { console.error(`Error extracting category from DOM: ${error.message}`); return null; } } // Function to determine appropriate category based on DOM, falling back to content analysis function determineCategory(title, content, clickedElement) { // First try to get the category from the DOM const domCategory = getCurrentCategoryFromDOM(); if (domCategory && domCategory !== "Uncategorized") { return domCategory; } // If we have a clicked element, try to extract category from its parent structure if (clickedElement) { try { // Navigate up through parents to find the column header let currentElement = clickedElement; let depth = 0; const maxDepth = 5; // Avoid infinite loops while (currentElement && depth < maxDepth) { // Try to find a column header among the ancestors if (currentElement.classList.contains('column-header')) { const categorySpan = currentElement.querySelector('span'); if (categorySpan) { const category = categorySpan.textContent.trim(); console.log(`Found category from element parents: ${category}`); return category; } } // Check for a data-category attribute on this element if (currentElement.hasAttribute('data-category')) { const category = currentElement.getAttribute('data-category'); console.log(`Found category from data attribute: ${category}`); return category; } // Move up to the parent currentElement = currentElement.parentElement; depth++; } } catch (error) { console.error(`Error finding category from clicked element: ${error.message}`); } } // Fallback to keyword-based analysis if DOM extraction failed console.log("Falling back to keyword-based category detection"); // Define sports categories and their associated keywords const categoryKeywords = { "Football": ["football", "soccer", "premier league", "fifa", "uefa"], "NFL": ["nfl", "national football league", "quarterback", "touchdown"], "NBA": ["nba", "basketball", "lebron", "lakers", "celtics"], "NHL": ["nhl", "hockey", "stanley cup", "ice hockey"], "MLB": ["mlb", "baseball", "world series", "pitcher"], "Tennis": ["tennis", "grand slam", "wimbledon", "us open"], "Boxing": ["boxing", "boxer", "fight", "heavyweight"], "UFC": ["ufc", "mma", "mixed martial arts", "fighter", "octagon"], "F1": ["f1", "formula one", "formula 1", "grand prix", "racing"], "Golf": ["golf", "pga", "tiger woods", "masters"], "Cricket": ["cricket", "test match", "ipl", "t20"], "Rugby": ["rugby", "six nations"] }; // Combine title and content for analysis const combinedText = (title + " " + content).toLowerCase(); // Check for keywords and count occurrences const categoryCounts = {}; for (const [category, keywords] of Object.entries(categoryKeywords)) { categoryCounts[category] = 0; for (const keyword of keywords) { // Count occurrences of each keyword const regex = new RegExp(`\\b${keyword}\\b`, 'gi'); const matches = combinedText.match(regex); if (matches) { categoryCounts[category] += matches.length; } } } // Find category with highest count let maxCount = 0; let bestCategory = "Uncategorized"; for (const [category, count] of Object.entries(categoryCounts)) { if (count > maxCount) { maxCount = count; bestCategory = category; } } return bestCategory; } async function scrapeAndPostArticle(event, url) { event.preventDefault(); console.log("Scrape and post function called for URL:", url); if (!url) { console.error('No URL found to scrape'); return; } // Get the icon element and clicked element for category detection const iconElement = event.currentTarget.querySelector('i'); const clickedElement = event.currentTarget; const originalIconClass = iconElement.className; // Detect category by finding the closest parent with data-category attribute const categoryElement = clickedElement.closest('.news-column'); const category = categoryElement ? categoryElement.getAttribute('data-category') : 'Uncategorized'; // Show loading indicator - change to spinning icon console.log("Changing icon to loading spinner"); iconElement.className = 'fas fa-spinner fa-spin'; // Added fa-spin class for rotation // Call the scrape function console.log("Starting scraping process..."); try { const result = await scrapeArticleContent(url); console.log("Scraping completed successfully"); console.log('%c Article Scraped Successfully ', 'background: #28a745; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Title ', 'background: #007bff; color: white; padding: 3px;', result.meta.title); console.log('%c Description ', 'background: #17a2b8; color: white; padding: 3px;', result.meta.description); console.log('%c Featured Image ', 'background: #fd7e14; color: white; padding: 3px;', result.meta.featuredImage || 'No featured image found'); console.log('%c Content ', 'background: #6c757d; color: white; padding: 3px;', result.content); // Start tweet search, content rewriting, and image processing in parallel const tweetPromise = searchTwitterForRelatedTweets(result.meta.title); const rewritePromise = rewriteWithDeepseek(result.content); const imagePromise = processAndUploadArticleImage(result.meta.featuredImage, result.meta.title); const [tweetIds, rewriteResult, imageResult] = await Promise.all([ tweetPromise, rewritePromise, imagePromise ]); // Display rewritten content results if (rewriteResult.success) { console.log('%c Content Rewritten Successfully ', 'background: #9c27b0; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Original Content ', 'background: #6c757d; color: white; padding: 3px;', result.content); console.log('%c Rewritten Content ', 'background: #e91e63; color: white; padding: 3px;', rewriteResult.rewritten_content); } else { console.error('Rewriting failed:', rewriteResult.message); } // Display image processing results if (imageResult && imageResult.success) { console.log('%c Alternative Image Uploaded ', 'background: #28a745; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Image URL ', 'background: #fd7e14; color: white; padding: 3px;', imageResult.url); console.log('%c Image ID ', 'background: #fd7e14; color: white; padding: 3px;', imageResult.id); } else { console.log('%c No Alternative Image Found ', 'background: #dc3545; color: white; padding: 5px; border-radius: 3px;'); } // Display tweet results if (tweetIds.length > 0) { console.log('%c Related Tweets Found ', 'background: #00bcd4; color: white; padding: 5px; border-radius: 3px;'); tweetIds.forEach((id, index) => { console.log(`%c Tweet ${index + 1} ID `, 'background: #ff9800; color: white; padding: 3px;', id); console.log(`Tweet URL: https://twitter.com/x/status/${id}`); }); } else { console.log('%c No Related Tweets Found ', 'background: #ff5722; color: white; padding: 5px; border-radius: 3px;'); } console.log(`%c Detected Category `, 'background: #9c27b0; color: white; padding: 3px;', `${category}`); // Post to WordPress const postResult = await postToWordPress({ title: result.meta.title, content: rewriteResult.success ? rewriteResult.rewritten_content : result.content, imageId: imageResult && imageResult.success ? imageResult.id : null, tweetIds: tweetIds, category: category, tags: result.meta.tags // Add tags here }); if (postResult.success) { console.log('%c WordPress Post Created Successfully ', 'background: #28a745; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Post ID ', 'background: #007bff; color: white; padding: 3px;', postResult.postId); console.log('%c Post URL ', 'background: #007bff; color: white; padding: 3px;', postResult.postUrl); } else { console.error('WordPress post creation failed:', postResult.error); } // Return combined results return { original: result, rewritten: rewriteResult, tweets: tweetIds, image: imageResult, post: postResult }; } catch (error) { console.error('Process error:', error); } finally { // Reset the icon console.log("Resetting icon to original state"); iconElement.className = originalIconClass; } } // Function to scrape article content using the Firecrawl API async function scrapeArticleContent(url) { console.log("Inside scrapeArticleContent function"); // Replace with your actual API key const apiKey = 'fc-d8dbcc45575a4fcea54f31fd7109e809'; console.log("Using API key:", apiKey); // Initial extraction request const payload = { urls: [url], prompt: "Extract the full main content of the post from the specified URL, explicitly including all texts inside box-like containers, cards, or feature-boxes that contain icons, headings, descriptions, lists, paragraphs, or similar structured content. DO NOT skip any boxed content regardless of class names or nesting. Ensure text from elements such as
,

, ,

,

,

,
  • , or any other textual containers inside the boxes is fully captured, **including deeply nested
    elements**.", schema: { type: "object", properties: { post_content: {type: "string"}, meta_title: {type: "string"}, meta_description: {type: "string"}, featured_image_url: {type: "string"}, post_tags: {type: "array", items: {type: "string"}} }, required: ["post_content"] } }; try { console.log("Sending initial extraction request"); // Start extraction const extractResponse = await fetch('https://api.firecrawl.dev/v1/extract', { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` }, body: JSON.stringify(payload) }); console.log("Extract response status:", extractResponse.status); if (!extractResponse.ok) { throw new Error(`API error: ${extractResponse.status}`); } const extractData = await extractResponse.json(); console.log("Extract data received:", extractData); if (!extractData.id) { throw new Error('Extraction ID not found'); } const extractId = extractData.id; const statusUrl = `https://api.firecrawl.dev/v1/extract/${extractId}`; console.log("Status URL:", statusUrl); // Poll for results let status = "processing"; let maxRetries = 20; let retryCount = 0; let statusData = null; console.log("Starting polling loop"); while (status !== "completed" && retryCount < maxRetries) { // Wait before checking status console.log(`Waiting before retry ${retryCount + 1}/${maxRetries}`); await new Promise(resolve => setTimeout(resolve, 2000)); console.log("Checking extraction status"); const statusResponse = await fetch(statusUrl, { method: 'GET', headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${apiKey}` } }); console.log("Status response received:", statusResponse.status); if (!statusResponse.ok) { throw new Error(`Status check error: ${statusResponse.status}`); } statusData = await statusResponse.json(); console.log("Status data:", statusData); if (!statusData.status) { throw new Error('Status not found in response'); } status = statusData.status; console.log("Current status:", status); retryCount++; } console.log("Final status:", status); if (status === "completed" && statusData && statusData.data && statusData.data.post_content) { console.log("Extraction completed successfully"); return { url: url, error: false, content: statusData.data.post_content, meta: { title: statusData.data.meta_title || '', description: statusData.data.meta_description || '', featuredImage: statusData.data.featured_image_url || '', tags: statusData.data.post_tags || [] } }; } else { throw new Error('Extraction failed or timed out'); } } catch (error) { console.error('Error in scraping:', error); return { url: url, error: error.message, content: '', meta: { title: '', description: '', featuredImage: '', tags: [] } }; } } async function getModifyAndUploadImage(featuredImageUrl, postTitle, wpApiUrl, wpUsername, wpApplicationPassword) { console.log(`Finding an alternative image for title: "${postTitle}"`); try { // 1. Search for a new image based on the title using the PHP proxy const searchTerm = `${postTitle} sports`; console.log(`Searching for images with term: "${searchTerm}"`); const searchResponse = await fetch(`https://7newsbox.online/image_search_proxy.php?q=${encodeURIComponent(searchTerm)}`); if (!searchResponse.ok) { throw new Error(`Image search proxy failed: ${searchResponse.status}`); } const searchData = await searchResponse.json(); let imageResults = searchData.images_results || []; if (imageResults.length === 0) { throw new Error("No image results found"); } // 2. Filter out images from the original source to avoid copyright issues let usableResults = imageResults; if (featuredImageUrl) { const originalDomain = new URL(featuredImageUrl).hostname; const filteredResults = imageResults.filter(img => { const imgUrl = img.original || ""; return !imgUrl.includes(originalDomain); }); // If we have filtered results, use those; otherwise fall back to all results usableResults = filteredResults.length > 0 ? filteredResults : imageResults; } // 3. Select a random image from the first 10 results (or fewer if less available) const randomIndex = Math.floor(Math.random() * Math.min(10, usableResults.length)); const selectedImage = usableResults[randomIndex]; const newImageUrl = selectedImage.original; console.log(`Selected image URL: ${newImageUrl}`); // 4. Download the image const imageResponse = await fetch(newImageUrl); if (!imageResponse.ok) { throw new Error(`Failed to download image: ${imageResponse.status}`); } // Get the image as blob const imageBlob = await imageResponse.blob(); // 5. Modify the image using canvas const modifiedImageBlob = await modifyImage(imageBlob, postTitle); // 6. Upload to WordPress const uploadResult = await uploadToWordPress(modifiedImageBlob, postTitle, wpApiUrl, wpUsername, wpApplicationPassword); return uploadResult; } catch (error) { console.error(`Error processing image: ${error.message}`); return null; } } // Function to modify an image using canvas async function modifyImage(imageBlob, title) { return new Promise((resolve, reject) => { const img = new Image(); img.onload = () => { try { // Create canvas const canvas = document.createElement('canvas'); // Resize image to make it more unique (95-105% of original size) const resizeFactor = 0.95 + Math.random() * 0.1; // Random between 0.95 and 1.05 const newWidth = Math.floor(img.width * resizeFactor); const newHeight = Math.floor(img.height * resizeFactor); canvas.width = newWidth; canvas.height = newHeight; // Get context and draw image const ctx = canvas.getContext('2d'); // Apply slight adjustments to brightness/contrast ctx.filter = `brightness(${0.9 + Math.random() * 0.2}) contrast(${0.9 + Math.random() * 0.2})`; ctx.drawImage(img, 0, 0, newWidth, newHeight); // Add metadata as text at the bottom (since we can't modify EXIF in JS directly) // This is optional and can be removed if you don't want visible metadata ctx.filter = 'none'; ctx.fillStyle = 'rgba(255, 255, 255, 0.7)'; ctx.fillRect(0, newHeight - 20, newWidth, 20); ctx.fillStyle = 'black'; ctx.font = '12px Arial'; ctx.fillText(`Sports News - ${new Date().toISOString().split('T')[0]}`, 10, newHeight - 6); // Convert canvas to blob canvas.toBlob(blob => { resolve(blob); }, 'image/jpeg', 0.92); // 0.92 quality } catch (error) { reject(error); } }; img.onerror = () => reject(new Error('Failed to load image')); img.src = URL.createObjectURL(imageBlob); }); } // Function to upload image to WordPress async function uploadToWordPress(imageBlob, title, wpApiUrl, wpUsername, wpApplicationPassword) { try { // Create filename const domain = "sports-news"; const timestamp = Math.floor(Date.now() / 1000); const filename = `sports_news_${timestamp}_${domain}.jpg`; // Create form data const formData = new FormData(); formData.append('file', imageBlob, filename); formData.append('title', `Sports Image - ${title}`); formData.append('caption', `Image for article: ${title}`); // Create basic auth credentials const credentials = btoa(`${wpUsername}:${wpApplicationPassword}`); // Upload to WordPress const response = await fetch(`${wpApiUrl}/media`, { method: 'POST', headers: { 'Authorization': `Basic ${credentials}` }, body: formData }); if (!response.ok) { throw new Error(`WordPress upload failed: ${response.status}`); } const responseData = await response.json(); console.log('Image uploaded successfully to WordPress'); return { id: responseData.id, url: responseData.source_url, success: true }; } catch (error) { console.error(`WordPress upload error: ${error.message}`); return { success: false, error: error.message }; } } async function processAndUploadArticleImage(featuredImageUrl, postTitle) { // WordPress REST API credentials const wpApiUrl = 'https://7sport.net/en/wp-json/wp/v2'; const wpUsername = 'steven@'; const wpApplicationPassword = '4HuI sf5q MI9k Bdjl 2NgW c74t'; console.log('Starting image processing and upload...'); const result = await getModifyAndUploadImage( featuredImageUrl, postTitle, wpApiUrl, wpUsername, wpApplicationPassword ); if (result && result.success) { console.log(`Image uploaded successfully. ID: ${result.id}, URL: ${result.url}`); return result; } else { console.error('Image processing failed'); return null; } } // Integration with your existing scrapeArticle function function enhancedScrapeArticle(event, url) { event.preventDefault(); console.log("Scrape function called for URL:", url); if (!url) { console.error('No URL found to scrape'); return; } // Get the icon element const iconElement = event.currentTarget.querySelector('i'); const originalIconClass = iconElement.className; // Show loading indicator console.log("Changing icon to loading spinner"); iconElement.className = 'fas fa-spinner loading'; // Call the scrape function console.log("Starting scraping process..."); scrapeArticleContent(url) .then(result => { console.log("Scraping completed successfully"); console.log('%c Article Scraped Successfully ', 'background: #28a745; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Title ', 'background: #007bff; color: white; padding: 3px;', result.meta.title); console.log('%c Description ', 'background: #17a2b8; color: white; padding: 3px;', result.meta.description); console.log('%c Featured Image ', 'background: #fd7e14; color: white; padding: 3px;', result.meta.featuredImage || 'No featured image found'); console.log('%c Content ', 'background: #6c757d; color: white; padding: 3px;', result.content); // Start tweet search, content rewriting, and image processing in parallel const tweetPromise = searchTwitterForRelatedTweets(result.meta.title); const rewritePromise = rewriteWithDeepseek(result.content); const imagePromise = processAndUploadArticleImage(result.meta.featuredImage, result.meta.title); return Promise.all([ tweetPromise, rewritePromise, imagePromise, Promise.resolve(result) ]); }) .then(([tweetIds, rewriteResult, imageResult, originalResult]) => { // Display rewritten content results if (rewriteResult.success) { console.log('%c Content Rewritten Successfully ', 'background: #9c27b0; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Original Content ', 'background: #6c757d; color: white; padding: 3px;', originalResult.content); console.log('%c Rewritten Content ', 'background: #e91e63; color: white; padding: 3px;', rewriteResult.rewritten_content); } else { console.error('Rewriting failed:', rewriteResult.message); } // Display image processing results if (imageResult && imageResult.success) { console.log('%c Alternative Image Uploaded ', 'background: #28a745; color: white; padding: 5px; border-radius: 3px;'); console.log('%c Image URL ', 'background: #fd7e14; color: white; padding: 3px;', imageResult.url); console.log('%c Image ID ', 'background: #fd7e14; color: white; padding: 3px;', imageResult.id); } else { console.log('%c No Alternative Image Found ', 'background: #dc3545; color: white; padding: 5px; border-radius: 3px;'); } // Display tweet results if (tweetIds.length > 0) { console.log('%c Related Tweets Found ', 'background: #00bcd4; color: white; padding: 5px; border-radius: 3px;'); tweetIds.forEach((id, index) => { console.log(`%c Tweet ${index + 1} ID `, 'background: #ff9800; color: white; padding: 3px;', id); console.log(`Tweet URL: https://twitter.com/x/status/${id}`); }); } else { console.log('%c No Related Tweets Found ', 'background: #ff5722; color: white; padding: 5px; border-radius: 3px;'); } // Return a combined result object if needed for further processing return { original: originalResult, rewritten: rewriteResult, tweets: tweetIds, image: imageResult }; }) .catch(error => { console.error('Process error:', error); }) .finally(() => { // Reset the icon console.log("Resetting icon to original state"); iconElement.className = originalIconClass; }); } // Function to search Twitter for related tweets async function searchTwitterForRelatedTweets(title, maxTweets = 3) { console.log(`Searching Twitter for tweets related to: ${title}`); try { // Call the PHP proxy endpoint const response = await fetch(`https://7newsbox.online/twitter_search_proxy.php?title=${encodeURIComponent(title)}&maxTweets=${maxTweets}`); if (!response.ok) { console.error(`Twitter proxy error: ${response.status}`); const errorText = await response.text(); console.error(errorText); return []; } const data = await response.json(); // Check if tweets were found if (!data.tweets || data.tweets.length === 0) { console.log("No tweets found"); return []; } console.log(`Found ${data.tweets.length} relevant tweets`); return data.tweets; } catch (error) { console.error(`Error searching Twitter: ${error.message}`); return []; } } // Function to rewrite content using Deepseek API async function rewriteWithDeepseek(content) { console.log("Starting content rewriting with Deepseek..."); // Your Deepseek API key const apiKey = 'sk-c5f787815b974d00935a86443a9a91b3'; const apiUrl = 'https://api.deepseek.com/v1/chat/completions'; // Prepare the prompt for Deepseek const prompt = `You are an expert content writer and SEO specialist. Rewrite the following content naturally and uniquely while following these key requirements: - Maintain all primary and LSI keywords at optimal density. - Preserve the heading hierarchy (H1-H6). - Keep meta keywords and important phrases. - Use diverse sentence structures. - Write in active voice. - Ensure natural paragraph flow while maintaining the original meaning and key points. - Format the text in **pure HTML (not Markdown)**, ensuring that headings, bold text, paragraphs, and lists are properly structured with appropriate HTML tags. - Do not include any markdown syntax (*, #, **). - The output should be clean and ready to be rendered in a web page. Here's the content to rewrite in pure HTML format: ${content}`; // Prepare the API request payload const payload = { model: 'deepseek-chat', // Replace with actual model name if different messages: [ { role: 'user', content: prompt } ], temperature: 0.7 }; try { console.log("Sending request to Deepseek API..."); const response = await fetch(apiUrl, { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' }, body: JSON.stringify(payload) }); console.log("Response status:", response.status); if (!response.ok) { throw new Error(`API request failed with status ${response.status}`); } const result = await response.json(); console.log("API response received:", result); // Check if we got a valid response if (result.choices && result.choices[0] && result.choices[0].message && result.choices[0].message.content) { const rewrittenContent = result.choices[0].message.content; console.log("Rewriting successful"); return { success: true, rewritten_content: rewrittenContent }; } else { throw new Error('Invalid API response structure'); } } catch (error) { console.error("Error in rewriting content:", error); return { success: false, message: error.message }; } }