// Global State Management const state = { currentFile: null, fileType: null, pdfDoc: null, currentPage: 1, totalPages: 0, extractedData: [], extractedDataWithCoords: [], isProcessing: false, ocrWorker: null, abortController: null }; // Initialize PDF.js Worker pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js'; // DOM Elements let elements = {}; function initializeElements() { elements = { uploadSection: document.getElementById('upload-section'), previewSection: document.getElementById('preview-section'), resultsSection: document.getElementById('results-section'), previewContainer: document.getElementById('preview-container'), pageSelector: document.getElementById('page-selector'), fileInfo: document.getElementById('file-info'), resultsGrid: document.getElementById('results-grid'), resultsStats: document.getElementById('results-stats'), processingModal: document.getElementById('processing-modal'), processingTitle: document.getElementById('processing-title'), processingStatus: document.getElementById('processing-status'), progressBar: document.getElementById('progress-bar'), progressPercent: document.getElementById('progress-percent'), jsonContent: document.getElementById('json-content'), jsonPreview: document.getElementById('json-preview'), jsonChevron: document.getElementById('json-chevron'), toggleJson: document.getElementById('toggle-json'), copyAllBtn: document.getElementById('copy-all-btn'), toast: document.getElementById('toast'), toastMessage: document.getElementById('toast-message') }; } // Event Listeners Setup document.addEventListener('DOMContentLoaded', () => { initializeElements(); setupEventListeners(); setupGlobalErrorHandling(); // Listen for invalid-file event from upload-zone document.addEventListener('invalid-file', (e) => { showToast(e.detail.message, 'error'); }); }); function setupEventListeners() { // Listen for custom events from components document.addEventListener('file-selected', handleFileSelected); document.addEventListener('reset-app', resetApplication); document.addEventListener('analyze-pdf', analyzePDFStructure); document.addEventListener('ocr-thai', performOCR); document.addEventListener('export-json', () => exportData('json')); document.addEventListener('export-csv', () => exportData('csv')); document.addEventListener('export-excel', () => exportData('excel')); document.addEventListener('export-html', () => exportData('html')); document.addEventListener('copy-all', copyAllData); // Page selector change if (elements.pageSelector) { elements.pageSelector.addEventListener('change', (e) => { state.currentPage = parseInt(e.target.value); renderPDFPage(state.currentPage); }); } // JSON Toggle if (elements.toggleJson) { elements.toggleJson.addEventListener('click', toggleJsonPreview); } // Copy All Button if (elements.copyAllBtn) { elements.copyAllBtn.addEventListener('click', copyAllData); } } function setupGlobalErrorHandling() { window.addEventListener('error', (e) => { console.error('Global error:', e.error); showToast('เกิดข้อผิดพลาด: ' + (e.error?.message || 'Unknown error'), 'error'); }); window.addEventListener('unhandledrejection', (e) => { console.error('Unhandled promise rejection:', e.reason); showToast('เกิดข้อผิดพลาดในการประมวลผล', 'error'); }); } // File Handling Functions function handleFileSelected(e) { const file = e.detail.file; if (!file) return; // Validate file type const validTypes = ['application/pdf', 'image/jpeg', 'image/jpg', 'image/png']; if (!validTypes.includes(file.type)) { showToast('ไฟล์ไม่รองรับ กรุณาอัปโหลด PDF, JPG หรือ PNG', 'error'); return; } // Check file size (limit to 50MB) const maxSize = 50 * 1024 * 1024; // 50MB if (file.size > maxSize) { showToast('ไฟล์ใหญ่เกินไป กรุณาอัปโหลดไฟล์ขนาดไม่เกิน 50MB', 'error'); return; } // Clean up previous state if (state.ocrWorker) { state.ocrWorker.terminate(); state.ocrWorker = null; } if (state.abortController) { state.abortController.abort(); } state.abortController = new AbortController(); state.currentFile = file; state.extractedData = []; if (file.type === 'application/pdf') { state.fileType = 'pdf'; loadPDF(file); } else { state.fileType = 'image'; loadImage(file); } updateUIForFileLoaded(); } function updateUIForFileLoaded() { elements.uploadSection.classList.add('hidden'); elements.previewSection.classList.remove('hidden'); elements.resultsSection.classList.add('hidden'); const sizeMB = (state.currentFile.size / 1024 / 1024).toFixed(2); elements.fileInfo.textContent = `${state.currentFile.name} (${sizeMB} MB)`; } async function loadPDF(file) { try { const arrayBuffer = await file.arrayBuffer(); const pdf = await pdfjsLib.getDocument({ data: arrayBuffer, disableAutoFetch: true, disableStream: true }).promise; state.pdfDoc = pdf; state.totalPages = pdf.numPages; state.currentPage = 1; // Setup page selector setupPageSelector(); // Render first page await renderPDFPage(1); showToast('โหลด PDF สำเร็จ'); } catch (error) { console.error('PDF Load Error:', error); if (error.name !== 'AbortError') { showToast('ไม่สามารถโหลด PDF ได้', 'error'); } } } function setupPageSelector() { if (elements.pageSelector) { elements.pageSelector.innerHTML = ''; elements.pageSelector.classList.remove('hidden'); for (let i = 1; i <= state.totalPages; i++) { const option = document.createElement('option'); option.value = i; option.textContent = `หน้า ${i} จาก ${state.totalPages}`; elements.pageSelector.appendChild(option); } elements.pageSelector.value = 1; } } async function renderPDFPage(pageNum) { try { if (elements.previewContainer) { elements.previewContainer.innerHTML = '
'; feather.replace(); } const page = await state.pdfDoc.getPage(pageNum); const scale = 1.5; const viewport = page.getViewport({ scale }); const canvas = document.createElement('canvas'); canvas.id = 'pdf-canvas'; const context = canvas.getContext('2d'); canvas.height = viewport.height; canvas.width = viewport.width; await page.render({ canvasContext: context, viewport: viewport }).promise; if (elements.previewContainer) { elements.previewContainer.innerHTML = ''; elements.previewContainer.appendChild(canvas); } page.cleanup(); } catch (error) { console.error('Render Error:', error); if (error.name !== 'AbortError') { showToast('ไม่สามารถแสดงหน้าได้', 'error'); } } } function loadImage(file) { if (elements.pageSelector) { elements.pageSelector.classList.add('hidden'); } const reader = new FileReader(); reader.onload = (e) => { const img = document.createElement('img'); img.id = 'preview-image'; img.src = e.target.result; img.className = 'max-w-full h-auto rounded-lg shadow-lg'; img.loading = 'lazy'; if (elements.previewContainer) { elements.previewContainer.innerHTML = ''; elements.previewContainer.appendChild(img); } }; reader.onerror = () => { showToast('ไม่สามารถโหลดภาพได้', 'error'); }; reader.readAsDataURL(file); } // PDF Structure Analysis async function analyzePDFStructure() { if (!state.pdfDoc || state.isProcessing) return; state.isProcessing = true; showProcessingModal('กำลังวิเคราะห์โครงสร้าง PDF', 'กำลังอ่านข้อมูลจากเอกสาร...', 0); try { const allData = []; const allDataWithCoords = []; const Y_TOLERANCE = 10; // pixels const X_GAP_THRESHOLD = 50; // pixels for column detection for (let pageNum = 1; pageNum <= state.totalPages; pageNum++) { // Check for abort signal if (state.abortController?.signal.aborted) { break; } updateProcessingProgress(`กำลังวิเคราะห์หน้า ${pageNum}/${state.totalPages}...`, ((pageNum - 1) / state.totalPages) * 100); const page = await state.pdfDoc.getPage(pageNum); const viewport = page.getViewport({ scale: 1.0 }); const textContent = await page.getTextContent(); const items = textContent.items; if (items.length === 0) { page.cleanup(); continue; } // Sort by Y position (descending - top to bottom) items.sort((a, b) => b.transform[5] - a.transform[5]); // Group by rows using Y-tolerance const rows = []; items.forEach(item => { const y = item.transform[5]; const existingRow = rows.find(r => Math.abs(r.y - y) < Y_TOLERANCE); if (existingRow) { existingRow.items.push(item); // Update average Y const totalY = existingRow.items.reduce((sum, i) => sum + i.transform[5], 0); existingRow.y = totalY / existingRow.items.length; } else { rows.push({ y, items: [item], originalY: y }); } }); // Sort rows by Y (top to bottom) rows.sort((a, b) => b.y - a.y); // Sort items in each row by X (left to right) rows.forEach(row => { row.items.sort((a, b) => a.transform[4] - b.transform[4]); }); // Detect column boundaries const columnBoundaries = detectColumnBoundaries(rows, X_GAP_THRESHOLD); // Convert to 2D array const pageData = rows.map(row => { const rowData = new Array(columnBoundaries.length + 1).fill(''); const rowDataWithCoords = new Array(columnBoundaries.length + 1).fill(null); row.items.forEach(item => { const x = item.transform[4]; const colIndex = findColumnIndex(x, columnBoundaries); if (colIndex !== -1) { const text = item.str.trim(); if (text) { rowData[colIndex] += (rowData[colIndex] ? ' ' : '') + text; // Store with coordinates if (!rowDataWithCoords[colIndex]) { rowDataWithCoords[colIndex] = { text: text, coordinates: { x: Math.round(item.transform[4]), y: Math.round(viewport.height - item.transform[5]), width: Math.round(item.width), height: Math.round(item.height) } }; } else { // Append text and update coordinates rowDataWithCoords[colIndex].text += ' ' + text; // Update coordinates to encompass the entire text rowDataWithCoords[colIndex].coordinates.width = Math.round( item.transform[4] + item.width - rowDataWithCoords[colIndex].coordinates.x ); } } } }); return { textData: rowData.filter(cell => cell.trim() !== ''), coordData: rowDataWithCoords.filter(cell => cell && cell.text.trim() !== '') }; }); // Add to main arrays pageData.forEach(row => { if (row.textData.length > 0) { allData.push(row.textData); } if (row.coordData.length > 0) { allDataWithCoords.push({ page: pageNum, cells: row.coordData }); } }); page.cleanup(); // Yield to UI thread every page await new Promise(resolve => setTimeout(resolve, 0)); } if (!state.abortController?.signal.aborted) { state.extractedData = allData; state.extractedDataWithCoords = allDataWithCoords; renderResults(); showToast(`วิเคราะห์สำเร็จ พบ ${allData.length} แถวข้อมูล`); } } catch (error) { console.error('Analysis Error:', error); if (error.name !== 'AbortError') { showToast('เกิดข้อผิดพลาดในการวิเคราะห์: ' + error.message, 'error'); } } finally { state.isProcessing = false; hideProcessingModal(); } } function detectColumnBoundaries(rows, threshold) { const gaps = []; rows.forEach(row => { for (let i = 1; i < row.items.length; i++) { const prevX = row.items[i-1].transform[4] + (row.items[i-1].width || 0); const currentX = row.items[i].transform[4]; const gap = currentX - prevX; if (gap > threshold) { gaps.push((prevX + currentX) / 2); } } }); // Cluster similar boundaries (within 20px) const clustered = []; gaps.forEach(gap => { const existing = clustered.find(c => Math.abs(c - gap) < 20); if (!existing) clustered.push(gap); }); return clustered.sort((a, b) => a - b); } function findColumnIndex(x, boundaries) { for (let i = 0; i < boundaries.length; i++) { if (x < boundaries[i]) return i; } return boundaries.length; } // OCR Functions async function performOCR() { if (state.isProcessing) return; let imageSource = null; if (state.fileType === 'image') { const img = document.getElementById('preview-image'); if (img) imageSource = img.src; } else if (state.fileType === 'pdf') { const canvas = document.getElementById('pdf-canvas'); if (canvas) imageSource = canvas.toDataURL('image/png'); } if (!imageSource) { showToast('ไม่พบเอกสารสำหรับ OCR', 'error'); return; } state.isProcessing = true; showProcessingModal('กำลังตรวจจับข้อความ (OCR)', 'กำลังโหลด Engine...', 0); try { const worker = await Tesseract.createWorker('tha', 1, { logger: m => { if (m.status === 'recognizing text') { updateProcessingProgress(`กำลังอ่านข้อความภาษาไทย... ${Math.round(m.progress * 100)}%`, m.progress * 100); } else if (m.status === 'loading language traineddata') { updateProcessingProgress('กำลังโหลดข้อมูลภาษาไทย...', 10); } }, errorHandler: err => console.error('OCR Error:', err) }); state.ocrWorker = worker; const result = await worker.recognize(imageSource); // Parse OCR result into structured data const lines = result.data.text.split('\n').filter(line => line.trim()); const parsed = lines.map(line => { // Split by multiple spaces to detect columns return line.split(/\s{2,}/).map(cell => cell.trim()).filter(cell => cell); }).filter(row => row.length > 0); state.extractedData = parsed; renderResults(); showToast(`OCR สำเร็จ พบข้อความ ${lines.length} บรรทัด`); } catch (error) { console.error('OCR Error:', error); if (error.name !== 'AbortError') { showToast('เกิดข้อผิดพลาดในการ OCR: ' + error.message, 'error'); } } finally { if (state.ocrWorker) { await state.ocrWorker.terminate(); state.ocrWorker = null; } state.isProcessing = false; hideProcessingModal(); } } // Results Rendering function renderResults() { if ((!state.extractedData.length && !state.extractedDataWithCoords.length) || !elements.resultsSection) return; elements.resultsSection.classList.remove('hidden'); elements.resultsGrid.innerHTML = ''; const totalRows = state.extractedData.length || state.extractedDataWithCoords.length; if (elements.resultsStats) { elements.resultsStats.textContent = `พบข้อมูล ${totalRows} แถว`; } // Update JSON preview with coordinate data if available const dataToShow = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData; if (elements.jsonContent) { elements.jsonContent.textContent = JSON.stringify(dataToShow, null, 2); } const fragment = document.createDocumentFragment(); const batchSize = 50; let renderedCount = 0; function renderBatch(startIndex) { const endIndex = Math.min(startIndex + batchSize, totalRows); for (let i = startIndex; i < endIndex; i++) { const row = dataToShow[i]; const card = document.createElement('div'); card.className = 'data-card bg-white rounded-xl p-4 shadow-sm hover:shadow-md transition-all'; const header = document.createElement('div'); header.className = 'flex justify-between items-center mb-3 pb-2 border-b border-slate-100'; const rowNum = document.createElement('span'); rowNum.className = 'text-xs font-bold text-blue-600 bg-blue-50 px-2 py-1 rounded'; rowNum.textContent = `แถวที่ ${i + 1}`; const copyBtn = document.createElement('button'); copyBtn.className = 'text-slate-400 hover:text-blue-600 transition-colors'; copyBtn.innerHTML = ''; copyBtn.onclick = () => copyRow(row, i); header.appendChild(rowNum); header.appendChild(copyBtn); const content = document.createElement('div'); content.className = 'space-y-2'; // Check if we have coordinate data const hasCoords = row.coordinates || (Array.isArray(row) && row.some(cell => cell && typeof cell === 'object' && cell.text)); if (hasCoords) { // Handle coordinate-based data const cells = Array.isArray(row) ? row : row.cells || []; cells.forEach((cell, colIndex) => { if (!cell) return; const cellText = typeof cell === 'object' ? cell.text : cell; const cellCoords = typeof cell === 'object' ? cell.coordinates : null; const cellDiv = document.createElement('div'); cellDiv.className = 'text-sm text-slate-700 flex gap-2 items-start'; let coordInfo = ''; if (cellCoords) { coordInfo = `[${cellCoords.x},${cellCoords.y}]`; } cellDiv.innerHTML = ` C${colIndex + 1}
${escapeHtml(cellText || '')} ${coordInfo}
`; content.appendChild(cellDiv); }); } else { // Handle regular array data row.forEach((cell, colIndex) => { if (!cell) return; const cellDiv = document.createElement('div'); cellDiv.className = 'text-sm text-slate-700 flex gap-2'; cellDiv.innerHTML = ` C${colIndex + 1} ${escapeHtml(cell)} `; content.appendChild(cellDiv); }); } card.appendChild(header); card.appendChild(content); fragment.appendChild(card); } renderedCount = endIndex; if (renderedCount < totalRows) { setTimeout(() => renderBatch(renderedCount), 0); } else { elements.resultsGrid.appendChild(fragment); feather.replace(); // Scroll to results setTimeout(() => { elements.resultsSection.scrollIntoView({ behavior: 'smooth', block: 'start' }); }, 100); } } renderBatch(0); } // Export Functions function exportData(format) { const dataToExport = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData; if (!dataToExport.length) { showToast('ไม่มีข้อมูลสำหรับ Export', 'error'); return; } const timestamp = new Date().toISOString().slice(0, 19).replace(/:/g, '-'); const filename = `DocuAnalyza-${timestamp}`; switch(format) { case 'json': exportJSON(filename, dataToExport); break; case 'csv': exportCSV(filename, dataToExport); break; case 'excel': exportExcel(filename, dataToExport); break; case 'html': exportHTML(filename, dataToExport); break; } } function exportJSON(filename, data) { const dataStr = JSON.stringify(data, null, 2); downloadFile(dataStr, `${filename}.json`, 'application/json'); showToast('ดาวน์โหลด JSON สำเร็จ'); } function exportCSV(filename, data) { let csv = ''; if (state.extractedDataWithCoords.length) { // Export coordinate data as CSV data.forEach((row, rowIndex) => { if (row.cells && Array.isArray(row.cells)) { const rowData = row.cells.map(cell => { if (cell && typeof cell === 'object') { return `"${(cell.text || '').toString().replace(/"/g, '""')}"`; } return '""'; }); csv += rowData.join(',') + '\n'; } }); } else { // Export regular array data csv = data.map(row => row.map(cell => `"${(cell || '').toString().replace(/"/g, '""')}"`).join(',') ).join('\n'); } downloadFile(csv, `${filename}.csv`, 'text/csv'); showToast('ดาวน์โหลด CSV สำเร็จ'); } function exportExcel(filename, data) { let worksheetData; if (state.extractedDataWithCoords.length) { // Export coordinate data worksheetData = data.map(row => { if (row.cells && Array.isArray(row.cells)) { return row.cells.map(cell => { if (cell && typeof cell === 'object') { return cell.text || ''; } return ''; }); } return []; }); } else { // Export regular array data worksheetData = data; } const ws = XLSX.utils.aoa_to_sheet(worksheetData); const wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Extracted Data"); // Auto-width columns if (worksheetData.length > 0) { const colWidths = worksheetData[0].map((_, colIndex) => ({ wch: Math.max(...worksheetData.map(row => (row[colIndex] || '').toString().length)) + 2 })); ws['!cols'] = colWidths; } XLSX.writeFile(wb, `${filename}.xlsx`); showToast('ดาวน์โหลด Excel สำเร็จ'); } function exportHTML(filename, data) { if (!data.length) { showToast('ไม่มีข้อมูลสำหรับ Export', 'error'); return; } try { let htmlContent = ''; if (state.extractedDataWithCoords.length) { // Export coordinate data htmlContent = ` ${escapeHtml(state.currentFile?.name || 'เอกสาร')}
${data.map(row => `
${row.cells ? row.cells.map(cell => `
${escapeHtml(cell.text || '')} [${cell.coordinates.x},${cell.coordinates.y}]
` ).join('') : ''}
` ).join('')}
`; } else { // Export regular array data htmlContent = ` ${escapeHtml(state.currentFile?.name || 'เอกสาร')}
${data.map(row => `
${row.map(cell => escapeHtml(cell || '') ).join(' ')}
` ).join('')}
`; } downloadFile(htmlContent, `${filename}.html`, 'text/html'); showToast('ดาวน์โหลด HTML สำเร็จ'); } catch (error) { console.error('HTML Export Error:', error); showToast('เกิดข้อผิดพลาดในการ Export', 'error'); } } function downloadFile(content, filename, mimeType) { const blob = new Blob([content], { type: mimeType + ';charset=utf-8' }); const url = URL.createObjectURL(blob); const a = document.createElement('a'); a.href = url; a.download = filename; document.body.appendChild(a); a.click(); document.body.removeChild(a); URL.revokeObjectURL(url); } // Utility Functions function copyAllData() { const dataToCopy = state.extractedDataWithCoords.length ? state.extractedDataWithCoords : state.extractedData; if (!dataToCopy.length) return; let text = ''; if (state.extractedDataWithCoords.length) { text = dataToCopy.map(row => row.cells ? row.cells.map(cell => cell.text || '').join(' | ') : '' ).join('\n'); } else { text = dataToCopy.map(row => row.join(' | ')).join('\n'); } navigator.clipboard.writeText(text).then(() => { showToast('คัดลอกข้อมูลทั้งหมดแล้ว'); }).catch(() => { showToast('ไม่สามารถคัดลอกได้', 'error'); }); } function copyRow(row, index) { const text = row.join(' | '); navigator.clipboard.writeText(text).then(() => { showToast(`คัดลอกแถวที่ ${index + 1} แล้ว`); }); } function toggleJsonPreview() { if (!elements.jsonPreview || !elements.jsonChevron) return; const isHidden = elements.jsonPreview.classList.contains('hidden'); if (isHidden) { elements.jsonPreview.classList.remove('hidden'); elements.jsonChevron.style.transform = 'rotate(180deg)'; } else { elements.jsonPreview.classList.add('hidden'); elements.jsonChevron.style.transform = 'rotate(0deg)'; } } function resetApplication() { // Abort any ongoing operations if (state.abortController) { state.abortController.abort(); } if (state.ocrWorker) { state.ocrWorker.terminate(); state.ocrWorker = null; } state.currentFile = null; state.fileType = null; state.pdfDoc = null; state.currentPage = 1; state.totalPages = 0; state.extractedData = []; state.extractedDataWithCoords = []; state.isProcessing = false; state.abortController = null; if (elements.uploadSection) elements.uploadSection.classList.remove('hidden'); if (elements.previewSection) elements.previewSection.classList.add('hidden'); if (elements.resultsSection) elements.resultsSection.classList.add('hidden'); if (elements.previewContainer) elements.previewContainer.innerHTML = ''; if (elements.pageSelector) { elements.pageSelector.innerHTML = ''; elements.pageSelector.classList.add('hidden'); } if (elements.jsonContent) elements.jsonContent.textContent = ''; showToast('รีเซ็ตระบบเรียบร้อย'); } function showProcessingModal(title, status, percent) { if (!elements.processingModal) return; if (elements.processingTitle) elements.processingTitle.textContent = title; if (elements.processingStatus) elements.processingStatus.textContent = status; if (elements.progressBar) elements.progressBar.style.width = percent + '%'; if (elements.progressPercent) elements.progressPercent.textContent = Math.round(percent) + '%'; elements.processingModal.classList.remove('hidden'); } function updateProcessingProgress(status, percent) { if (!elements.processingModal) return; if (elements.processingStatus) elements.processingStatus.textContent = status; if (elements.progressBar) elements.progressBar.style.width = percent + '%'; if (elements.progressPercent) elements.progressPercent.textContent = Math.round(percent) + '%'; } function hideProcessingModal() { if (elements.processingModal) { elements.processingModal.classList.add('hidden'); } } function showToast(message, type = 'success') { if (!elements.toast || !elements.toastMessage) return; elements.toastMessage.textContent = message; const icon = elements.toast.querySelector('i'); if (type === 'error') { icon.classList.remove('text-green-400'); icon.classList.add('text-red-400'); icon.setAttribute('data-feather', 'alert-circle'); } else { icon.classList.remove('text-red-400'); icon.classList.add('text-green-400'); icon.setAttribute('data-feather', 'check-circle'); } feather.replace(); elements.toast.classList.remove('opacity-0', 'translate-y-10', 'pointer-events-none'); setTimeout(() => { elements.toast.classList.add('opacity-0', 'translate-y-10', 'pointer-events-none'); }, 3000); } function escapeHtml(text) { const div = document.createElement('div'); div.textContent = text; return div.innerHTML; }