]*>([^<]+)<\/h[1-3]>/gi, field: 'title', type: 'string' as const, description: 'Main title or heading' },
497 |
498 | // Description patterns
499 | { regex: /]*>([^<]{50,})<\/p>/gi, field: 'description', type: 'string' as const, description: 'Content description' },
500 |
501 | // Image patterns
502 | { regex: /
]*src="([^"]+)"/gi, field: 'image_url', type: 'string' as const, description: 'Image URL' },
503 | { regex: /\!\[([^\]]*)\]\(([^)]+)\)/g, field: 'image_url', type: 'string' as const, description: 'Image URL from markdown' },
504 |
505 | // Category patterns
506 | { regex: /category[:\s]*([^,\n]+)/gi, field: 'category', type: 'string' as const, description: 'Product or content category' },
507 | { regex: /tags?[:\s]*([^,\n]+)/gi, field: 'tags', type: 'array' as const, description: 'Content tags' },
508 |
509 | // Contact/Business patterns
510 | { regex: /phone[:\s]*([^,\n]+)/gi, field: 'phone', type: 'string' as const, description: 'Phone number' },
511 | { regex: /email[:\s]*([^,\s\n]+@[^,\s\n]+)/gi, field: 'email', type: 'string' as const, description: 'Email address' },
512 | { regex: /address[:\s]*([^,\n]{10,})/gi, field: 'address', type: 'string' as const, description: 'Physical address' },
513 |
514 | // Date patterns
515 | { regex: /\d{1,2}\/\d{1,2}\/\d{4}/g, field: 'date', type: 'string' as const, description: 'Date information' },
516 | { regex: /\d{4}-\d{2}-\d{2}/g, field: 'date', type: 'string' as const, description: 'Date in ISO format' },
517 |
518 | // Rating patterns
519 | { regex: /rating[:\s]*(\d+\.?\d*)/gi, field: 'rating', type: 'number' as const, description: 'Rating score' },
520 | { regex: /(\d+\.?\d*)\s*stars?/gi, field: 'rating', type: 'number' as const, description: 'Star rating' },
521 |
522 | // Stock/Availability
523 | { regex: /in\s+stock/gi, field: 'availability', type: 'string' as const, description: 'Stock availability' },
524 | { regex: /out\s+of\s+stock/gi, field: 'availability', type: 'string' as const, description: 'Stock availability' },
525 |
526 | // Author/Publisher
527 | { regex: /author[:\s]*([^,\n]+)/gi, field: 'author', type: 'string' as const, description: 'Content author' },
528 | { regex: /by\s+([A-Z][a-z]+\s+[A-Z][a-z]+)/g, field: 'author', type: 'string' as const, description: 'Author name' },
529 |
530 | // ID patterns
531 | { regex: /id[:\s]*([A-Za-z0-9-_]+)/gi, field: 'id', type: 'string' as const, description: 'Unique identifier' },
532 | { regex: /sku[:\s]*([A-Za-z0-9-_]+)/gi, field: 'sku', type: 'string' as const, description: 'Product SKU' },
533 | ]
534 |
535 | // Apply pattern matching
536 | patterns.forEach(pattern => {
537 | const matches = content.match(pattern.regex)
538 | if (matches && matches.length > 0) {
539 | schema.properties[pattern.field] = {
540 | type: pattern.type,
541 | description: pattern.description
542 | }
543 | }
544 | })
545 |
546 | // Always include basic fields if not detected
547 | if (!schema.properties.title) {
548 | schema.properties.title = { type: 'string', description: 'Main title or name' }
549 | }
550 |
551 | if (!schema.properties.description) {
552 | schema.properties.description = { type: 'string', description: 'Main content or description' }
553 | }
554 |
555 | // Content-type specific fields
556 | if (content.toLowerCase().includes('product') || schema.properties.price) {
557 | // E-commerce detected
558 | if (!schema.properties.price) schema.properties.price = { type: 'string', description: 'Product price' }
559 | if (!schema.properties.category) schema.properties.category = { type: 'string', description: 'Product category' }
560 | if (!schema.properties.availability) schema.properties.availability = { type: 'string', description: 'Stock status' }
561 | }
562 |
563 | if (content.toLowerCase().includes('article') || content.toLowerCase().includes('blog')) {
564 | // Blog/Article detected
565 | if (!schema.properties.author) schema.properties.author = { type: 'string', description: 'Article author' }
566 | if (!schema.properties.date) schema.properties.date = { type: 'string', description: 'Publication date' }
567 | if (!schema.properties.tags) schema.properties.tags = { type: 'array', description: 'Article tags' }
568 | }
569 |
570 | if (content.toLowerCase().includes('contact') || schema.properties.phone || schema.properties.email) {
571 | // Contact/Business page detected
572 | if (!schema.properties.name) schema.properties.name = { type: 'string', description: 'Business or person name' }
573 | if (!schema.properties.phone) schema.properties.phone = { type: 'string', description: 'Phone number' }
574 | if (!schema.properties.email) schema.properties.email = { type: 'string', description: 'Email address' }
575 | if (!schema.properties.address) schema.properties.address = { type: 'string', description: 'Physical address' }
576 | }
577 |
578 | return schema
579 | }
580 |
581 | // Fallback default schema
582 | function getDefaultSchema(): Schema {
583 | return {
584 | type: 'object',
585 | properties: {
586 | title: { type: 'string', description: 'Main title or heading' },
587 | description: { type: 'string', description: 'Content description' },
588 | url: { type: 'string', description: 'Source URL' }
589 | }
590 | }
591 | }
592 |
593 | // Helper functions to extract data from markdown
594 | function extractTitleFromMarkdown(markdown: string): string {
595 | // Look for first H1 heading
596 | const h1Match = markdown.match(/^#\s+(.+)$/m)
597 | if (h1Match) return h1Match[1].trim()
598 |
599 | // Look for first H2 heading
600 | const h2Match = markdown.match(/^##\s+(.+)$/m)
601 | if (h2Match) return h2Match[1].trim()
602 |
603 | // Take first line as title
604 | const firstLine = markdown.split('\n')[0]
605 | return firstLine ? firstLine.trim() : ''
606 | }
607 |
608 | function extractDateFromMarkdown(markdown: string): string {
609 | // Look for common date patterns
610 | const datePatterns = [
611 | /(\d{4}-\d{2}-\d{2})/,
612 | /(\d{1,2}\/\d{1,2}\/\d{4})/,
613 | /(January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}/i,
614 | /(\d{1,2}\s+(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{4})/i,
615 | /(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+\d{1,2},?\s+\d{4}/i // Added pattern for "Oct 21, 2024"
616 | ]
617 |
618 | for (const pattern of datePatterns) {
619 | const match = markdown.match(pattern)
620 | if (match) return match[1]
621 | }
622 |
623 | return ''
624 | }
625 |
626 | function extractFieldFromMarkdown(markdown: string, fieldName: string, fieldType: string): unknown {
627 | // Simple extraction based on field name
628 | const fieldRegex = new RegExp(`${fieldName}[:\\s]+(.+)`, 'i')
629 | const match = markdown.match(fieldRegex)
630 |
631 | if (match) {
632 | const value = match[1].trim()
633 |
634 | // Convert based on type
635 | if (fieldType === 'number') {
636 | const num = parseFloat(value)
637 | return isNaN(num) ? null : num
638 | } else if (fieldType === 'boolean') {
639 | return value.toLowerCase() === 'true' || value.toLowerCase() === 'yes'
640 | } else if (fieldType === 'array') {
641 | return value.split(',').map(s => s.trim())
642 | }
643 |
644 | return value
645 | }
646 |
647 | return fieldType === 'array' ? [] : ''
648 | }
649 |
650 |
--------------------------------------------------------------------------------
/src/app/page.tsx:
--------------------------------------------------------------------------------
1 | 'use client'
2 |
3 | import { useState } from 'react'
4 | import Image from 'next/image'
5 | import { Layout, MainContent, Footer } from '@/components/layout/layout'
6 | import { Header } from '@/components/layout/header'
7 | import { Hero } from '@/components/layout/hero'
8 | import { Button } from '@/components/ui/button'
9 | import { Input } from '@/components/ui/input'
10 | import { Select } from '@/components/ui/select'
11 | import { Label } from '@/components/ui/label'
12 | import { ChevronRightIcon, ChevronDownIcon } from 'lucide-react'
13 |
14 | type Step = 1 | 2 | 3 | 4
15 | type Platform = 'wordpress' | 'shopify' | 'webflow' | 'drupal' | 'squarespace' | 'wix' | 'csv' | 'custom' | null
16 |
17 | interface SchemaField {
18 | name: string
19 | type: 'string' | 'number' | 'boolean' | 'array'
20 | description?: string
21 | }
22 |
23 | // TreeView Component
24 | interface TreeNode {
25 | urls: string[]
26 | children: Record
27 | count: number
28 | path: string
29 | }
30 |
31 | // Collapsible JSON viewer component
32 | interface CollapsibleJSONProps {
33 | data: unknown
34 | depth?: number
35 | }
36 |
37 | function CollapsibleJSON({ data, depth = 0 }: CollapsibleJSONProps) {
38 | const [collapsed, setCollapsed] = useState>(new Set())
39 |
40 | const toggleCollapse = (key: string) => {
41 | const newCollapsed = new Set(collapsed)
42 | if (newCollapsed.has(key)) {
43 | newCollapsed.delete(key)
44 | } else {
45 | newCollapsed.add(key)
46 | }
47 | setCollapsed(newCollapsed)
48 | }
49 |
50 | const renderValue = (value: unknown, key: string, parentKey: string = ''): React.ReactElement => {
51 | const fullKey = parentKey ? `${parentKey}.${key}` : key
52 | const indent = ' '.repeat(depth)
53 |
54 | if (value === null) return null
55 | if (value === undefined) return undefined
56 |
57 | if (typeof value === 'string') {
58 | // Check if string is long (more than 160 chars for better UX)
59 | if (value.length > 160) {
60 | const isCollapsed = collapsed.has(fullKey)
61 | // Show first line or 160 chars when collapsed
62 | const firstLineEnd = value.indexOf('\n')
63 | const truncateAt = firstLineEnd > 0 && firstLineEnd < 160 ? firstLineEnd : 160
64 |
65 | return (
66 |
67 | {isCollapsed ? (
68 | <>
69 | "{value.substring(0, truncateAt)}..."
70 |
76 | >
77 | ) : (
78 |
79 |
"
80 |
81 | {value}
82 |
83 |
"
84 |
90 |
91 | )}
92 |
93 | )
94 | }
95 | return "{value}"
96 | }
97 |
98 | if (typeof value === 'number') return {value}
99 | if (typeof value === 'boolean') return {String(value)}
100 |
101 | if (Array.isArray(value)) {
102 | const isCollapsed = collapsed.has(fullKey)
103 | if (value.length === 0) return []
104 |
105 | return (
106 |
107 |
113 | [
114 | {!isCollapsed && (
115 | <>
116 | {value.map((item, i) => (
117 |
118 | {indent} {renderValue(item, String(i), fullKey)}
119 | {i < value.length - 1 && ,}
120 |
121 | ))}
122 | {indent}
123 | >
124 | )}
125 | {isCollapsed && ...{value.length} items}
126 | ]
127 |
128 | )
129 | }
130 |
131 | if (typeof value === 'object' && value !== null) {
132 | const isCollapsed = collapsed.has(fullKey)
133 | const entries = Object.entries(value)
134 | if (entries.length === 0) return {'{}'}
135 |
136 | return (
137 |
138 |
144 | {'{'}
145 | {!isCollapsed && (
146 | <>
147 | {entries.map(([k, v], i) => (
148 |
149 | {indent} "{k}"
150 | :
151 | {renderValue(v, k, fullKey)}
152 | {i < entries.length - 1 && ,}
153 |
154 | ))}
155 | {indent}
156 | >
157 | )}
158 | {isCollapsed && ...{entries.length} properties}
159 | {'}'}
160 |
161 | )
162 | }
163 |
164 | return {String(value)}
165 | }
166 |
167 | return {renderValue(data, 'root')}
168 | }
169 |
170 | interface TreeViewProps {
171 | tree: Record
172 | selectedUrls: string[]
173 | onSelectionChange: (urls: string[]) => void
174 | onMapNode: (path: string) => Promise
175 | mappingNodes: Set
176 | expandedNodes: Set
177 | onToggleNode: (path: string) => void
178 | }
179 |
180 | function TreeView({ tree, selectedUrls, onSelectionChange, onMapNode, mappingNodes, expandedNodes, onToggleNode }: TreeViewProps) {
181 |
182 | const toggleNode = (path: string) => {
183 | onToggleNode(path)
184 | }
185 |
186 | const getNodeUrls = (node: TreeNode): string[] => {
187 | const urls: string[] = [...node.urls]
188 | Object.values(node.children).forEach(child => {
189 | urls.push(...getNodeUrls(child))
190 | })
191 | return urls
192 | }
193 |
194 | const isNodeSelected = (node: TreeNode): 'full' | 'partial' | 'none' => {
195 | const nodeUrls = getNodeUrls(node)
196 | const selectedCount = nodeUrls.filter(url => selectedUrls.includes(url)).length
197 |
198 | if (selectedCount === 0) return 'none'
199 | if (selectedCount === nodeUrls.length) return 'full'
200 | return 'partial'
201 | }
202 |
203 | const toggleNodeSelection = (node: TreeNode) => {
204 | const nodeUrls = getNodeUrls(node)
205 | const isSelected = isNodeSelected(node) === 'full'
206 |
207 | if (isSelected) {
208 | onSelectionChange(selectedUrls.filter(url => !nodeUrls.includes(url)))
209 | } else {
210 | onSelectionChange([...new Set([...selectedUrls, ...nodeUrls])])
211 | }
212 | }
213 |
214 | const renderNode = (name: string, node: TreeNode, level: number = 0) => {
215 | const hasChildren = Object.keys(node.children).length > 0
216 | const isExpanded = expandedNodes.has(node.path)
217 | const selectionState = isNodeSelected(node)
218 | const isMapping = mappingNodes.has(node.path)
219 |
220 | // For root level, show the full domain name
221 | const displayName = level === 0 ? name : `/${name}`
222 |
223 | return (
224 |
225 |
226 | {hasChildren && (
227 |
235 | )}
236 | {!hasChildren &&
}
237 |
238 |
259 |
260 | {/* Map button - show for any path that could be a directory */}
261 | {!isMapping && !node.path.match(/\.(html?|xml|json|txt|pdf|jpg|jpeg|png|gif|svg|css|js)$/i) && (
262 |
272 | )}
273 |
274 |
275 | {hasChildren && isExpanded && (
276 |
277 | {Object.entries(node.children)
278 | .sort(([a], [b]) => a.localeCompare(b))
279 | .map(([childName, childNode]) =>
280 | renderNode(childName, childNode, level + 1)
281 | )}
282 |
283 | )}
284 |
285 | )
286 | }
287 |
288 | return (
289 |
290 | {Object.entries(tree).map(([name, node]) => renderNode(name, node))}
291 |
292 | )
293 | }
294 |
295 | export default function ContentMigratorPage() {
296 | const [currentStep, setCurrentStep] = useState(1)
297 | const [sourceUrl, setSourceUrl] = useState('https://firecrawl.dev')
298 | const [selectedPlatform, setSelectedPlatform] = useState('webflow')
299 | const [exportFormat, setExportFormat] = useState('webflow')
300 | const [showSuccess, setShowSuccess] = useState(false)
301 | const [showUrlInput, setShowUrlInput] = useState(false)
302 | const [showExportOptions, setShowExportOptions] = useState(false)
303 | const [loading, setLoading] = useState(false)
304 | const [loadingText, setLoadingText] = useState('')
305 | const [schemaFields, setSchemaFields] = useState([
306 | { name: 'title', type: 'string' },
307 | { name: 'date', type: 'string' },
308 | { name: '', type: 'string' }
309 | ])
310 |
311 | const moveToStep = (step: Step) => {
312 | setCurrentStep(step)
313 | }
314 |
315 | const [crawlData, setCrawlData] = useState[]>([])
316 | const [saveRawResults] = useState(true)
317 | const [mapResults, setMapResults] = useState([])
318 | const [selectedUrls, setSelectedUrls] = useState([])
319 | const [isMapping, setIsMapping] = useState(false)
320 | const [mapProgress, setMapProgress] = useState('')
321 | const [mappingNodes, setMappingNodes] = useState>(new Set())
322 | const [isTransitioning, setIsTransitioning] = useState(false)
323 | const [fieldPage, setFieldPage] = useState(0)
324 | const fieldsPerPage = 8
325 | const [viewMode, setViewMode] = useState<'list' | 'tree'>('tree')
326 | const [batchSize, setBatchSize] = useState(50)
327 | const [expandedNodes, setExpandedNodes] = useState>(new Set())
328 |
329 | // Toggle node expansion
330 | const toggleNode = (path: string) => {
331 | setExpandedNodes(prev => {
332 | const newSet = new Set(prev)
333 | if (newSet.has(path)) {
334 | newSet.delete(path)
335 | } else {
336 | newSet.add(path)
337 | }
338 | return newSet
339 | })
340 | }
341 |
342 | // Map a specific node/path
343 | const handleMapNode = async (path: string) => {
344 | // Add to mapping set
345 | setMappingNodes(prev => new Set([...prev, path]))
346 |
347 | try {
348 | const response = await fetch('/api/map', {
349 | method: 'POST',
350 | headers: {
351 | 'Content-Type': 'application/json',
352 | },
353 | body: JSON.stringify({
354 | url: path,
355 | limit: 200
356 | })
357 | })
358 |
359 | const result = await response.json()
360 |
361 | if (result.success && Array.isArray(result.urls)) {
362 | // Filter URLs that start with the mapped path
363 | const pathUrls = result.urls.filter((url: string) => {
364 | try {
365 | const urlObj = new URL(url)
366 | const urlPath = urlObj.pathname
367 | const mappedPath = new URL(path).pathname
368 | return urlPath.startsWith(mappedPath)
369 | } catch {
370 | return false
371 | }
372 | })
373 |
374 | // If we only found one URL (the path itself), still process it
375 | // This allows recursive exploration even when a path has no immediate children
376 | const urlsToAdd = pathUrls.length > 0 ? pathUrls : [path]
377 |
378 | // Merge with existing results
379 | setMapResults(prev => {
380 | const combined = [...prev, ...urlsToAdd]
381 | return [...new Set(combined)] // Remove duplicates
382 | })
383 |
384 | // Auto-select new URLs under this path
385 | setSelectedUrls(prev => {
386 | const combined = [...prev, ...urlsToAdd]
387 | return [...new Set(combined)]
388 | })
389 |
390 | // Auto-expand the mapped node
391 | setExpandedNodes(prev => new Set([...prev, path]))
392 |
393 | // Recursively expand all parent nodes to ensure the new content is visible
394 | const expandParents = (nodePath: string) => {
395 | const parts = nodePath.split('/').filter(Boolean)
396 | let currentPath = ''
397 | const pathsToExpand: string[] = []
398 |
399 | for (const part of parts) {
400 | currentPath = currentPath ? `${currentPath}/${part}` : `https://${part}`
401 | pathsToExpand.push(currentPath)
402 | }
403 |
404 | setExpandedNodes(prev => new Set([...prev, ...pathsToExpand]))
405 | }
406 |
407 | expandParents(path)
408 | } else {
409 | throw new Error(result.error || 'Failed to map path')
410 | }
411 | } catch (error) {
412 | alert(error instanceof Error ? error.message : 'Failed to map path')
413 | } finally {
414 | // Remove from mapping set
415 | setMappingNodes(prev => {
416 | const newSet = new Set(prev)
417 | newSet.delete(path)
418 | return newSet
419 | })
420 | }
421 | }
422 |
423 | // Build tree structure from URLs
424 | const buildUrlTree = (urls: string[]) => {
425 | const tree: Record = {}
426 |
427 | // Group URLs by normalized domain first
428 | const urlsByDomain: Record = {}
429 |
430 | urls.forEach(url => {
431 | try {
432 | const urlObj = new URL(url)
433 | // Normalize domain (remove www. for grouping)
434 | const normalizedDomain = urlObj.hostname.replace(/^www\./, '')
435 | const domainKey = `${urlObj.protocol}//${normalizedDomain}`
436 |
437 | if (!urlsByDomain[domainKey]) {
438 | urlsByDomain[domainKey] = []
439 | }
440 | urlsByDomain[domainKey].push(url)
441 | } catch {
442 | // Invalid URL
443 | }
444 | })
445 |
446 | // Build tree for each domain group
447 | Object.entries(urlsByDomain).forEach(([domainKey, domainUrls]) => {
448 | const domainNode: TreeNode = {
449 | urls: [],
450 | children: {},
451 | count: 0,
452 | path: domainKey
453 | }
454 |
455 | domainUrls.forEach(url => {
456 | try {
457 | const urlObj = new URL(url)
458 | const pathParts = urlObj.pathname.split('/').filter(Boolean)
459 |
460 | // If it's the root path, count it for the domain
461 | if (pathParts.length === 0) {
462 | domainNode.urls.push(url)
463 | } else {
464 | // Build nested structure
465 | let current = domainNode.children
466 | let currentPath = domainKey
467 |
468 | pathParts.forEach((part, index) => {
469 | currentPath += '/' + part
470 | if (!current[part]) {
471 | current[part] = {
472 | urls: [],
473 | children: {},
474 | count: 0,
475 | path: currentPath
476 | }
477 | }
478 |
479 | // If it's the last part, add the URL
480 | if (index === pathParts.length - 1) {
481 | current[part].urls.push(url)
482 | }
483 |
484 | current = current[part].children
485 | })
486 | }
487 | } catch {
488 | // Invalid URL
489 | }
490 | })
491 |
492 | // Calculate total count for domain
493 | domainNode.count = domainUrls.length
494 | tree[domainKey] = domainNode
495 | })
496 |
497 | // Recalculate counts for all nodes
498 | const updateCounts = (node: TreeNode) => {
499 | let totalCount = node.urls.length
500 | Object.values(node.children).forEach((child: TreeNode) => {
501 | updateCounts(child)
502 | totalCount += child.count
503 | })
504 | node.count = totalCount
505 | }
506 |
507 | Object.values(tree).forEach(node => updateCounts(node))
508 |
509 | // Debug: Log the tree structure
510 | const totalUrls = urls.length
511 | let treeTotal = 0
512 | Object.values(tree).forEach(node => {
513 | treeTotal += node.count
514 | })
515 |
516 | // URL count validation
517 |
518 | return tree
519 | }
520 |
521 | const analyzeWebsite = async () => {
522 | if (!sourceUrl) {
523 | alert('Please enter a URL')
524 | return
525 | }
526 |
527 | // Start fade transition
528 | setIsTransitioning(true)
529 |
530 | // Wait for fade out, then start mapping
531 | setTimeout(async () => {
532 | moveToStep(2)
533 | setIsTransitioning(false)
534 |
535 | // Automatically start mapping
536 | setIsMapping(true)
537 | setMapProgress('Mapping site structure...')
538 |
539 | try {
540 | const response = await fetch('/api/map', {
541 | method: 'POST',
542 | headers: {
543 | 'Content-Type': 'application/json',
544 | },
545 | body: JSON.stringify({
546 | url: sourceUrl,
547 | limit: 200
548 | })
549 | })
550 |
551 | const result = await response.json()
552 |
553 | if (result.success && Array.isArray(result.urls)) {
554 |
555 | // Group URLs by their base path (without query params) to see duplicates
556 | const urlGroups: Record = {}
557 | result.urls.forEach((url: string) => {
558 | try {
559 | const urlObj = new URL(url)
560 | const baseUrl = `${urlObj.origin}${urlObj.pathname}`
561 | if (!urlGroups[baseUrl]) {
562 | urlGroups[baseUrl] = []
563 | }
564 | urlGroups[baseUrl].push(url)
565 | } catch {
566 | // Invalid URL
567 | }
568 | })
569 |
570 | // Log URLs that have duplicates
571 | const duplicates = Object.entries(urlGroups).filter(([, urls]) => urls.length > 1)
572 |
573 | // Filter out duplicate URLs that only differ by query parameters
574 | const uniqueUrls = Array.from(new Set(
575 | result.urls.map((url: string) => {
576 | try {
577 | const urlObj = new URL(url)
578 | // Remove query parameters for deduplication
579 | return `${urlObj.origin}${urlObj.pathname}`
580 | } catch {
581 | return url
582 | }
583 | })
584 | ))
585 |
586 |
587 | setMapResults(uniqueUrls as string[])
588 | // Start with no URLs selected
589 | setSelectedUrls([])
590 |
591 | // Auto-expand root domains
592 | const tree = buildUrlTree(uniqueUrls as string[])
593 | const rootPaths = Object.values(tree).map(node => node.path)
594 | setExpandedNodes(new Set(rootPaths))
595 |
596 | // Clear loading state immediately
597 | setIsMapping(false)
598 | setMapProgress('')
599 | } else {
600 | throw new Error(result.error || 'Failed to map website')
601 | }
602 | } catch (error) {
603 | alert(error instanceof Error ? error.message : 'Failed to map website')
604 | setIsMapping(false)
605 | setMapProgress('')
606 | }
607 | }, 500)
608 | }
609 |
610 | const addSchemaField = () => {
611 | setSchemaFields([...schemaFields, { name: '', type: 'string' }])
612 | // If we're adding the 9th field, move to the next page
613 | const newFieldCount = schemaFields.length + 1
614 | if (newFieldCount > fieldsPerPage && (newFieldCount - 1) % fieldsPerPage === 0) {
615 | setFieldPage(Math.floor((newFieldCount - 1) / fieldsPerPage))
616 | }
617 | }
618 |
619 | const removeSchemaField = (index: number) => {
620 | const newFields = schemaFields.filter((_, i) => i !== index)
621 | // Always ensure at least one empty field at the end
622 | if (newFields.length === 0 || newFields[newFields.length - 1].name !== '') {
623 | newFields.push({ name: '', type: 'string' })
624 | }
625 | setSchemaFields(newFields)
626 | }
627 |
628 | const updateSchemaField = (index: number, field: Partial) => {
629 | const newFields = [...schemaFields]
630 | newFields[index] = { ...newFields[index], ...field }
631 | setSchemaFields(newFields)
632 | }
633 |
634 | const applyTemplate = (templateName: string) => {
635 | const templates: Record = {
636 | shopify: [
637 | { name: 'title', type: 'string' },
638 | { name: 'description', type: 'string' },
639 | { name: 'price', type: 'string' },
640 | { name: 'vendor', type: 'string' },
641 | { name: 'product_type', type: 'string' },
642 | { name: 'tags', type: 'string' },
643 | { name: 'image_url', type: 'string' },
644 | { name: 'sku', type: 'string' },
645 | { name: 'inventory_quantity', type: 'number' },
646 | { name: '', type: 'string' } // Empty field for custom additions
647 | ],
648 | wordpress: [
649 | { name: 'title', type: 'string' },
650 | { name: 'content', type: 'string' },
651 | { name: 'author', type: 'string' },
652 | { name: 'publish_date', type: 'string' },
653 | { name: 'category', type: 'string' },
654 | { name: 'tags', type: 'string' },
655 | { name: 'featured_image', type: 'string' },
656 | { name: '', type: 'string' }
657 | ],
658 | woocommerce: [
659 | { name: 'title', type: 'string' },
660 | { name: 'description', type: 'string' },
661 | { name: 'price', type: 'string' },
662 | { name: 'regular_price', type: 'string' },
663 | { name: 'sale_price', type: 'string' },
664 | { name: 'sku', type: 'string' },
665 | { name: 'stock_quantity', type: 'number' },
666 | { name: 'category', type: 'string' },
667 | { name: 'image_url', type: 'string' },
668 | { name: '', type: 'string' }
669 | ],
670 | blog: [
671 | { name: 'title', type: 'string' },
672 | { name: 'content', type: 'string' },
673 | { name: 'date', type: 'string' },
674 | { name: 'author', type: 'string' },
675 | { name: 'category', type: 'string' },
676 | { name: 'tags', type: 'string' },
677 | { name: '', type: 'string' }
678 | ],
679 | ecommerce: [
680 | { name: 'title', type: 'string' },
681 | { name: 'description', type: 'string' },
682 | { name: 'price', type: 'string' },
683 | { name: 'image_url', type: 'string' },
684 | { name: 'category', type: 'string' },
685 | { name: 'availability', type: 'string' },
686 | { name: '', type: 'string' }
687 | ]
688 | }
689 |
690 | const template = templates[templateName]
691 | if (template) {
692 | setSchemaFields(template)
693 | setFieldPage(0) // Reset to first page
694 | }
695 | }
696 |
697 | const selectPlatform = (platform: Platform) => {
698 | setSelectedPlatform(platform)
699 | // Set appropriate export format based on platform
700 | switch (platform) {
701 | case 'wordpress':
702 | setExportFormat('wordpress')
703 | break
704 | case 'shopify':
705 | setExportFormat('shopify')
706 | break
707 | case 'webflow':
708 | setExportFormat('webflow')
709 | break
710 | case 'drupal':
711 | setExportFormat('drupal')
712 | break
713 | case 'csv':
714 | setExportFormat('csv')
715 | break
716 | case 'squarespace':
717 | setExportFormat('squarespace')
718 | break
719 | case 'wix':
720 | setExportFormat('wix')
721 | break
722 | case 'custom':
723 | setExportFormat('json')
724 | break
725 | }
726 | // Don't auto-advance - let user see the preview and decide
727 | }
728 |
729 | const startCrawl = async () => {
730 | if (!sourceUrl) {
731 | alert('Please enter a URL')
732 | return
733 | }
734 |
735 | // Check if URLs are selected
736 | if (selectedUrls.length === 0) {
737 | alert('Please map the site and select URLs to scrape')
738 | return
739 | }
740 |
741 | setLoading(true)
742 |
743 | let loadingInterval: NodeJS.Timeout | null = null
744 |
745 | setLoadingText(`Starting batch scrape of ${selectedUrls.length} pages...`)
746 |
747 | // Update loading text periodically
748 | loadingInterval = setInterval(() => {
749 | const messages = [
750 | `Extracting content from ${selectedUrls.length} pages...`,
751 | `Processing website data...`,
752 | `Applying schema to extracted content...`,
753 | `Organizing structured data...`
754 | ]
755 | const randomMessage = messages[Math.floor(Math.random() * messages.length)]
756 | setLoadingText(randomMessage)
757 | }, 3000)
758 |
759 | try {
760 | // Add AbortController for timeout
761 | const controller = new AbortController();
762 | const timeoutId = setTimeout(() => controller.abort(), 180000); // 3 minute timeout
763 |
764 | const response = await fetch('/api/crawl', {
765 | method: 'POST',
766 | headers: {
767 | 'Content-Type': 'application/json',
768 | },
769 | body: JSON.stringify({
770 | url: sourceUrl,
771 | schema: schemaFields.filter(f => f.name).length > 0 ? getSchema() : undefined,
772 | autoInfer: schemaFields.filter(f => f.name).length === 0, // Auto-infer if no manual schema
773 | limit: selectedUrls.length,
774 | includeRaw: saveRawResults,
775 | strategy: 'mapCrawl',
776 | selectedUrls: selectedUrls
777 | }),
778 | signal: controller.signal
779 | })
780 |
781 | clearTimeout(timeoutId)
782 |
783 | const result = await response.json()
784 |
785 | if (result.success) {
786 | setCrawlData(result.data)
787 | if (loadingInterval) {
788 | clearInterval(loadingInterval)
789 | }
790 | setLoading(false)
791 | moveToStep(3)
792 | } else {
793 | throw new Error(result.error || 'Failed to crawl')
794 | }
795 | } catch (error) {
796 | if (loadingInterval) {
797 | clearInterval(loadingInterval)
798 | }
799 | setLoading(false)
800 | if (error instanceof Error && error.name === 'AbortError') {
801 | alert('Crawl is taking longer than expected. Please try with fewer pages or check the URL.')
802 | } else {
803 | alert(error instanceof Error ? error.message : 'Failed to crawl website')
804 | }
805 | }
806 | }
807 |
808 | const startExport = async () => {
809 | if (crawlData.length === 0) {
810 | alert('Please crawl a website first')
811 | return
812 | }
813 |
814 | setLoading(true)
815 | setLoadingText('Formatting data for export...')
816 |
817 | setTimeout(async () => {
818 | // Check if we need to create multiple files
819 | const fileCount = Math.ceil(crawlData.length / batchSize)
820 |
821 | if (fileCount === 1) {
822 | // Single file export
823 | const exportData = formatExportData(crawlData, exportFormat)
824 | downloadFile(exportData, exportFormat)
825 | } else {
826 | // Multiple files - create a ZIP
827 | await downloadBatchedFiles(crawlData, batchSize, exportFormat)
828 | }
829 |
830 | setLoading(false)
831 | setShowSuccess(true)
832 | }, 1500)
833 | }
834 |
835 | const getExportPreview = (data: Record[], format: string) => {
836 | const previewData = data // Show all items in preview
837 |
838 | switch (format) {
839 | case 'json':
840 | case 'webflow':
841 | const jsonData = format === 'json' ? previewData : { items: previewData }
842 |
843 | return (
844 |
845 |
846 |
847 | )
848 | case 'csv':
849 | case 'woocommerce':
850 | case 'shopify':
851 | case 'drupal':
852 | case 'wix':
853 | // For CSV preview, work directly with the data instead of parsing CSV string
854 | const headers = Object.keys(previewData[0] || {})
855 |
856 | return (
857 |
858 |
859 |
860 |
861 | {headers.map((header, i) => (
862 | |
865 | {header}
866 | |
867 | ))}
868 |
869 |
870 |
871 | {previewData.map((item, i) => (
872 |
873 | {headers.map((header, j) => {
874 | const value = item[header]
875 | let displayValue = String(value || '-')
876 |
877 | if (typeof value === 'string') {
878 | // Clean up the content: remove newlines, excessive spaces, and truncate
879 | displayValue = value
880 | .replace(/\r?\n/g, ' ') // Replace newlines with spaces
881 | .replace(/\s+/g, ' ') // Replace multiple spaces with single space
882 | .trim() // Remove leading/trailing spaces
883 |
884 | // Truncate long content
885 | if (displayValue.length > 50) {
886 | displayValue = displayValue.substring(0, 47) + '...'
887 | }
888 | }
889 |
890 | return (
891 | |
892 |
893 | {displayValue}
894 |
895 | |
896 | )
897 | })}
898 |
899 | ))}
900 |
901 |
902 |
903 | )
904 | case 'wordpress':
905 | case 'squarespace':
906 | const xml = generateWordPressXML(previewData)
907 | return (
908 |
909 | {xml}
910 |
911 | )
912 | default:
913 | return (
914 |
915 |
916 |
917 | )
918 | }
919 | }
920 |
921 | const formatExportData = (data: Record[], format: string) => {
922 | switch (format) {
923 | case 'json':
924 | return JSON.stringify(data, null, 2)
925 | case 'shopify':
926 | return formatShopifyCSV(data)
927 | case 'csv':
928 | case 'woocommerce':
929 | case 'drupal':
930 | case 'wix':
931 | return convertToCSV(data)
932 | case 'wordpress':
933 | case 'squarespace':
934 | return generateWordPressXML(data)
935 | case 'webflow':
936 | return JSON.stringify({ items: data }, null, 2)
937 | default:
938 | return JSON.stringify(data, null, 2)
939 | }
940 | }
941 |
942 | const convertToCSV = (data: Record[]) => {
943 | if (data.length === 0) return ''
944 |
945 | const headers = Object.keys(data[0]).join(',')
946 | const rows = data.map(item =>
947 | Object.values(item).map(value => {
948 | if (typeof value === 'string') {
949 | // Replace newlines with spaces and escape quotes
950 | const cleaned = value.replace(/\r?\n/g, ' ').replace(/"/g, '""')
951 | return `"${cleaned}"`
952 | }
953 | return value
954 | }).join(',')
955 | )
956 |
957 | return [headers, ...rows].join('\n')
958 | }
959 |
960 | const formatShopifyCSV = (data: Record[]) => {
961 | if (data.length === 0) return ''
962 |
963 | // Shopify required columns in specific order
964 | const headers = [
965 | 'Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published',
966 | 'Option1 Name', 'Option1 Value', 'Variant SKU', 'Variant Grams',
967 | 'Variant Inventory Tracker', 'Variant Inventory Qty', 'Variant Inventory Policy',
968 | 'Variant Fulfillment Service', 'Variant Price', 'Variant Compare At Price',
969 | 'Variant Requires Shipping', 'Variant Taxable', 'Variant Barcode',
970 | 'Image Src', 'Image Position', 'Image Alt Text', 'Gift Card',
971 | 'SEO Title', 'SEO Description', 'Variant Weight Unit', 'Status'
972 | ]
973 |
974 | const rows = data.map((item, index) => {
975 | // Generate handle from title
976 | const title = String(item.title || `Product ${index + 1}`)
977 | const handle = title.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, '')
978 |
979 | // Map scraped data to Shopify fields
980 | const row: Record = {
981 | 'Handle': handle,
982 | 'Title': title,
983 | 'Body (HTML)': String(item.description || item.content || ''),
984 | 'Vendor': String(item.vendor || 'Default Vendor'),
985 | 'Type': String(item.product_type || item.type || ''),
986 | 'Tags': String(item.tags || ''),
987 | 'Published': 'TRUE',
988 | 'Option1 Name': 'Title',
989 | 'Option1 Value': 'Default Title',
990 | 'Variant SKU': String(item.sku || ''),
991 | 'Variant Grams': String(item.weight || '0'),
992 | 'Variant Inventory Tracker': 'shopify',
993 | 'Variant Inventory Qty': String(item.inventory_quantity || item.stock || '0'),
994 | 'Variant Inventory Policy': 'deny',
995 | 'Variant Fulfillment Service': 'manual',
996 | 'Variant Price': String(item.price || '0'),
997 | 'Variant Compare At Price': String(item.compare_at_price || ''),
998 | 'Variant Requires Shipping': 'TRUE',
999 | 'Variant Taxable': 'TRUE',
1000 | 'Variant Barcode': String(item.barcode || ''),
1001 | 'Image Src': String(item.image_url || item.image || ''),
1002 | 'Image Position': item.image_url || item.image ? '1' : '',
1003 | 'Image Alt Text': item.image_url || item.image ? title : '',
1004 | 'Gift Card': 'FALSE',
1005 | 'SEO Title': title.substring(0, 70),
1006 | 'SEO Description': String(item.description || item.content || '').substring(0, 320),
1007 | 'Variant Weight Unit': 'g',
1008 | 'Status': 'active'
1009 | }
1010 |
1011 | // Return values in correct order
1012 | return headers.map(header => {
1013 | const value = row[header]
1014 | if (value.includes('"') || value.includes(',') || value.includes('\n')) {
1015 | return `"${value.replace(/"/g, '""').replace(/\r?\n/g, ' ')}"`
1016 | }
1017 | return value
1018 | }).join(',')
1019 | })
1020 |
1021 | return [headers.join(','), ...rows].join('\n')
1022 | }
1023 |
1024 | const generateWordPressXML = (data: Record[]) => {
1025 | // Format current date in WordPress format (YYYY-MM-DD HH:MM:SS)
1026 | const now = new Date()
1027 | const formatWPDate = (date: Date) => {
1028 | const year = date.getFullYear()
1029 | const month = String(date.getMonth() + 1).padStart(2, '0')
1030 | const day = String(date.getDate()).padStart(2, '0')
1031 | const hours = String(date.getHours()).padStart(2, '0')
1032 | const minutes = String(date.getMinutes()).padStart(2, '0')
1033 | const seconds = String(date.getSeconds()).padStart(2, '0')
1034 | return `${year}-${month}-${day} ${hours}:${minutes}:${seconds}`
1035 | }
1036 |
1037 | const currentDate = formatWPDate(now)
1038 |
1039 | const items = data.map((item, index) => {
1040 | // Extract content, fallback to description if content not available
1041 | const content = item.content || item.description || ''
1042 | const title = item.title || `Post ${index + 1}`
1043 |
1044 | // Use current date/time for all posts since the scraped dates are in various formats
1045 | const postDate = currentDate
1046 | const postDateGMT = currentDate // For simplicity, using same as local time
1047 |
1048 | return `
1049 | -
1050 |
1051 | https://example.com/?p=${index + 1}
1052 | ${now.toUTCString()}
1053 |
1054 | https://example.com/?p=${index + 1}
1055 |
1056 |
1057 |
1058 | ${index + 1}
1059 |
1060 |
1061 |
1062 |
1063 |
1064 |
1065 |
1066 |
1067 | 0
1068 | 0
1069 |
1070 |
1071 | 0
1072 | ${Object.entries(item).filter(([key]) => !['title', 'content', 'description', 'date'].includes(key)).map(([key, value]) => `
1073 |
1074 |
1075 |
1076 | `).join('')}
1077 | ${item.date ? `
1078 |
1079 |
1080 |
1081 | ` : ''}
1082 |
`
1083 | }).join('')
1084 |
1085 | return `
1086 |
1087 |
1088 |
1089 |
1090 |
1091 |
1092 |
1099 |
1100 | Imported Content
1101 | https://example.com
1102 | Content imported from Firecrawl
1103 | ${new Date().toUTCString()}
1104 | en-US
1105 | 1.2
1106 | https://example.com
1107 | https://example.com
1108 |
1109 |
1110 | 1
1111 |
1112 |
1113 |
1114 |
1115 |
1116 |
1117 |
1118 | https://firecrawl.dev/?v=1.0
1119 | ${items}
1120 |
1121 | `
1122 | }
1123 |
1124 | const downloadFile = (content: string, format: string, filename?: string) => {
1125 | const blob = new Blob([content], { type: 'text/plain' })
1126 | const url = URL.createObjectURL(blob)
1127 | const a = document.createElement('a')
1128 | a.href = url
1129 |
1130 | const extensions: Record = {
1131 | json: 'json',
1132 | csv: 'csv',
1133 | wordpress: 'xml',
1134 | woocommerce: 'csv',
1135 | shopify: 'csv',
1136 | webflow: 'json',
1137 | drupal: 'csv',
1138 | bigcommerce: 'csv',
1139 | squarespace: 'xml',
1140 | wix: 'csv'
1141 | }
1142 |
1143 | a.download = filename || `export-${Date.now()}.${extensions[format] || 'txt'}`
1144 | document.body.appendChild(a)
1145 | a.click()
1146 | document.body.removeChild(a)
1147 | URL.revokeObjectURL(url)
1148 | }
1149 |
1150 | const downloadBatchedFiles = async (data: Record[], size: number, format: string) => {
1151 | const JSZip = (await import('jszip')).default
1152 | const zip = new JSZip()
1153 |
1154 | const batches = []
1155 | for (let i = 0; i < data.length; i += size) {
1156 | batches.push(data.slice(i, i + size))
1157 | }
1158 |
1159 | const extensions: Record = {
1160 | json: 'json',
1161 | csv: 'csv',
1162 | wordpress: 'xml',
1163 | woocommerce: 'csv',
1164 | shopify: 'csv',
1165 | webflow: 'json',
1166 | drupal: 'csv',
1167 | squarespace: 'xml',
1168 | wix: 'csv'
1169 | }
1170 |
1171 | const timestamp = Date.now()
1172 | const ext = extensions[format] || 'txt'
1173 |
1174 | // Add each batch to the ZIP file
1175 | for (let i = 0; i < batches.length; i++) {
1176 | const batch = batches[i]
1177 | const exportData = formatExportData(batch, format)
1178 | const filename = `export-part${i + 1}.${ext}`
1179 |
1180 | zip.file(filename, exportData)
1181 | }
1182 |
1183 | // Generate and download the ZIP file
1184 | const zipContent = await zip.generateAsync({ type: 'blob' })
1185 | const zipUrl = URL.createObjectURL(zipContent)
1186 | const a = document.createElement('a')
1187 | a.href = zipUrl
1188 | a.download = `export-${timestamp}.zip`
1189 | document.body.appendChild(a)
1190 | a.click()
1191 | document.body.removeChild(a)
1192 | URL.revokeObjectURL(zipUrl)
1193 | }
1194 |
1195 |
1196 | const getSchema = () => {
1197 | const properties: Record = {}
1198 | schemaFields.forEach(field => {
1199 | if (field.name) {
1200 | // Convert spaces to underscores in field names for JSON compatibility
1201 | const safeName = field.name.replace(/\s+/g, '_')
1202 | properties[safeName] = { type: field.type }
1203 | }
1204 | })
1205 | return {
1206 | type: 'object',
1207 | properties
1208 | }
1209 | }
1210 |
1211 | return (
1212 |
1213 |
1214 |
1215 |
1219 |
1220 | {/* Animated Transformation Visualization */}
1221 | = 2 || isTransitioning ? 'opacity-0' : 'opacity-100'} ${currentStep >= 2 ? 'hidden' : ''}`}>
1222 |
1223 |
1224 |
1225 | {/* Crawling Stage */}
1226 |
1227 |
Step 1: Map & Select Pages
1228 |
1229 |
1230 |
1231 |
1232 |
Mapping site structure...
1233 |
► https://firecrawl.dev
1234 |
├─ /blog
1235 |
├─ /2025/openai-launches-gpt5
1236 |
├─ /2025/apple-vision-pro-2
1237 |
├─ /2025/tesla-robotaxi-fleet
1238 |
├─ /docs
1239 |
├─ /pricing
1240 |
└─ /api
1241 |
5 pages selected
1242 |
1243 |
1244 |
1245 |
1246 |
Selected Pages: 5
1247 |
1248 |
1249 |
Mapping
1250 |
1251 |
1252 |
1253 |
1254 |
1255 | {/* Extract Stage */}
1256 |
1257 |
Step 2: Batch Scrape & Extract
1258 |
1259 |
1260 |
1261 |
1262 |
1263 |
1264 |
title: "OpenAI Launches GPT-5"
1265 |
date: "2025-01-22"
1266 |
content: "Major breakthrough in..."
1267 |
1268 |
1269 |
1270 |
1271 |
title: "Apple Vision Pro 2 Revealed"
1272 |
date: "2025-01-21"
1273 |
content: "Revolutionary AR headset..."
1274 |
1275 |
1276 |
1277 |
1278 |
title: "Tesla Robotaxi Fleet Live"
1279 |
date: "2025-01-20"
1280 |
content: "Autonomous vehicles hit..."
1281 |
1282 |
1283 |
1284 |
1285 |
title: "Meta AI Assistant Update"
1286 |
date: "2025-01-19"
1287 |
content: "New features include..."
1288 |
1289 |
1290 |
1291 |
1292 |
1293 |
1294 |
1295 |
Extracted: 5 pages
1296 |
1297 |
1298 |
Batch Processing
1299 |
1300 |
1301 |
1302 |
1303 |
1304 |
1305 |
Step 3: Export
1306 |
1307 | Exported: 127
1308 | • JSON • XML • CSV
1309 |
1310 |
1311 |
1312 |
1313 |
1314 |
1315 | {/* JSON Structure */}
1316 |
1317 |
1318 |
{'[{'}
1319 |
"title": "OpenAI Launches GPT-5",
1320 |
"date": "2025-01-22",
1321 |
"content": "Major breakthrough in...",
1322 |
"author": "Sarah Chen"
1323 |
{'}'}{']'}
1324 |
1325 |
1326 |
1327 | {/* WordPress XML */}
1328 |
1329 |
1330 |
<rss version="2.0">
1331 |
<channel>
1332 |
<item>
1333 |
<title>OpenAI Launches GPT-5</title>
1334 |
<wp:post_date>2025-01-22</wp:post_date>
1335 |
<content:encoded>Major breakthrough...</content:encoded>
1336 |
</item>
1337 |
1338 |
1339 |
1340 | {/* Shopify CSV */}
1341 |
1342 |
1343 |
Title,Date,Content,Tags,Author
1344 |
"OpenAI Launches GPT-5","2025-01-22","Major breakthrough...","AI|Technology","Sarah Chen"
1345 |
"Apple Vision Pro 2","2025-01-21","Revolutionary AR...","AR|Apple","John Doe"
1346 |
1347 |
1348 |
1349 | {/* Webflow JSON */}
1350 |
1351 |
1352 |
{'{'}
1353 |
"items": {'[{'}
1354 |
"name": "OpenAI Launches GPT-5",
1355 |
"slug": "openai-launches-gpt-5",
1356 |
"date": "2025-01-22",
1357 |
"content": "Major breakthrough..."
1358 |
{'}'}{']'}
1359 |
1360 |
1361 |
1362 |
1363 |
1364 |
1365 |
1366 |
1367 |
1368 |
1369 |
1370 |
1371 |
1372 | {/* Main Panel - Configuration */}
1373 |
1374 | {/* Step 1: Analyze Website */}
1375 | {currentStep === 1 && (
1376 |
1377 |
1378 |
1379 | {!showUrlInput ? (
1380 |
1387 | ) : (
1388 |
1389 |
1390 |
setSourceUrl(e.target.value)}
1395 | placeholder="https://example.com/blog"
1396 | className="flex-1 h-12 text-base px-6 pr-16 bg-white rounded-full border-0 focus:ring-2 focus:ring-orange-500"
1397 | onKeyPress={(e) => e.key === 'Enter' && analyzeWebsite()}
1398 | autoFocus
1399 | />
1400 |
1408 |
1409 |
1410 | )}
1411 |
1412 |
1413 |
1414 | )}
1415 |
1416 | {/* Step 2: Customize Schema */}
1417 | {currentStep === 2 && loading && (
1418 |
1419 |
1420 |
1421 | {/* Skeleton for Select Target Platform */}
1422 |
1423 |
Select Target Platform
1424 |
1425 | {[...Array(8)].map((_, i) => (
1426 |
1427 |
1428 |
1429 |
.---
1430 |
1431 |
1432 | ))}
1433 |
1434 |
1435 |
1439 |
1440 |
1441 | {/* Skeleton for Export Preview */}
1442 |
1443 |
1444 |
Export Preview
1445 | Items:
1446 |
1447 |
1448 |
1449 |
1450 | {/* Loading text overlay */}
1451 |
1452 |
1453 | {loadingText}
1454 |
1455 |
1456 |
1457 | {/* Skeleton lines */}
1458 |
1459 |
1460 |
1461 |
1462 |
1463 |
1464 |
1465 |
1466 |
1467 |
1468 |
1469 |
1470 |
1471 |
1472 |
1473 |
1474 |
1475 |
1476 |
1477 |
1478 | )}
1479 |
1480 | {/* Step 2: Customize Schema */}
1481 | {currentStep === 2 && !loading && (
1482 |
1483 |
1484 | {/* Left Column - Data Fields */}
1485 |
1486 |
1487 |
1488 |
Data Fields
1489 |
1490 |
1507 |
1508 |
1509 |
{schemaFields.length} fields
1510 |
1511 |
1512 |
1513 |
1590 |
1591 | {schemaFields.length > fieldsPerPage && (
1592 |
1593 |
1594 | Showing {fieldPage * fieldsPerPage + 1}-{Math.min((fieldPage + 1) * fieldsPerPage, schemaFields.length)} of {schemaFields.length}
1595 |
1596 |
1597 |
1606 |
1615 |
1616 |
1617 | )}
1618 |
1619 |
1620 |
1621 | {/* Right Column - Page Selection */}
1622 |
1623 |
1624 |
Page Selection
1625 | {isMapping && (
1626 |
1627 |
1628 |
{mapProgress || 'Mapping site structure...'}
1629 |
1630 | )}
1631 |
1632 |
1633 | {/* Map Results Table */}
1634 | {mapResults.length > 0 && (
1635 |
1636 |
1637 |
Select Pages
1638 |
1639 |
Found {mapResults.length} pages
1640 |
1641 |
1651 |
1661 |
1662 |
1663 |
1664 |
1665 |
1666 |
1673 |
1680 |
1681 |
1682 |
1683 |
1684 | {viewMode === 'list' ? (
1685 | // List View
1686 | mapResults.map((url, index) => (
1687 |
1702 | ))
1703 | ) : (
1704 | // Tree View
1705 |
1714 | )}
1715 |
1716 |
1717 |
1718 |
1719 |
1720 | {selectedUrls.length} pages selected
1721 |
1722 |
1723 |
1724 | )}
1725 |
1726 | {/* Show loading state while mapping */}
1727 | {isMapping && (
1728 |
1729 | {/* Tree skeleton loader */}
1730 |
1731 |
1732 | {/* Root domain skeleton */}
1733 |
1734 |
1735 |
1736 |
1737 |
1738 |
1739 |
1740 | {/* Child items skeleton */}
1741 |
1742 |
1743 |
1744 |
1745 |
1746 |
1747 |
1748 |
1749 | {/* Nested items skeleton */}
1750 |
1751 |
1752 |
1753 |
1754 |
1755 |
1756 |
1757 |
1758 |
1759 |
1760 |
1761 |
1762 |
1763 |
1764 |
1765 |
1766 |
1767 |
1768 |
1769 |
1770 |
1771 |
1772 |
1773 |
1774 |
1775 |
1776 |
1777 |
1778 |
1779 |
1780 |
1781 |
1782 |
1783 |
1784 |
1785 |
1786 |
1787 |
1788 |
1789 |
1790 |
1791 |
1792 | )}
1793 |
1794 | {/* Show message when no pages yet */}
1795 | {!isMapping && mapResults.length === 0 && (
1796 |
1797 |
Mapping will begin automatically...
1798 |
1799 | )}
1800 |
1801 |
1802 |
1803 |
1804 |
1811 |
1822 |
1823 |
1824 | )}
1825 |
1826 | {/* Step 3: Select Platform */}
1827 | {currentStep === 3 && (
1828 |
1829 |
1830 |
Select Target Platform
1831 |
1832 | {[
1833 | { id: 'webflow', name: 'Webflow', icon: '/svg/webflow.svg', fileType: '.json' },
1834 | { id: 'wordpress', name: 'WordPress', icon: '/svg/wordpress.svg', fileType: '.xml' },
1835 | { id: 'shopify', name: 'Shopify', icon: '/svg/shopify.svg', fileType: '.csv' },
1836 | { id: 'drupal', name: 'Drupal', icon: '/drupal.png', fileType: '.csv' },
1837 | { id: 'squarespace', name: 'Squarespace', icon: '/squarespace.jpg', fileType: '.xml' },
1838 | { id: 'wix', name: 'Wix', icon: '/wix.png', fileType: '.csv' },
1839 | { id: 'csv', name: 'CSV', icon: '/csv-file-icon.svg', fileType: '.csv' },
1840 | { id: 'custom', name: 'JSON', icon: '/json-file-icon.svg', fileType: '.json' },
1841 | ].map((platform) => (
1842 |
1883 | ))}
1884 |
1885 |
1886 |
1887 |
1910 |
1919 |
1920 |
1921 |
1922 | {/* Right: Preview or Export Options */}
1923 |
1924 | {!showExportOptions ? (
1925 | <>
1926 |
1927 |
Export Preview
1928 | Items: {crawlData.length}
1929 |
1930 |
1931 | {crawlData.length > 0 ? (
1932 |
1933 |
1934 |
1935 | {getExportPreview(crawlData, exportFormat)}
1936 |
1937 |
1938 |
1939 | ) : (
1940 |
1941 |
1942 |
CSV
1943 |
No data scraped yet
1944 |
Complete the scraping process to see a preview
1945 |
1946 |
1947 | )}
1948 | >
1949 | ) : (
1950 |
1951 |
1952 |
Export Configuration
1953 |
1960 |
1961 |
1962 |
1963 |
1964 |
1965 |
1966 |
1982 |
1983 |
1984 |
1985 |
1986 |
1987 | setBatchSize(Math.max(1, parseInt(e.target.value) || 50))}
1992 | placeholder="Items per file"
1993 | min="1"
1994 | className="flex-1"
1995 | />
1996 |
1997 | = {Math.ceil(crawlData.length / batchSize)} file{Math.ceil(crawlData.length / batchSize) !== 1 ? 's' : ''}
1998 |
1999 |
2000 |
2001 | Will create a ZIP with multiple files if needed
2002 |
2003 |
2004 |
2005 |
2006 |
2013 |
2014 |
2015 | {showSuccess && (
2016 |
2017 |
Export Complete!
2018 |
Successfully extracted {crawlData.length} items
2019 |
Your download should start automatically
2020 |
2021 | )}
2022 |
2023 |
2024 |
2025 | )}
2026 |
2027 |
2028 | )}
2029 |
2030 |
2031 |
2032 |
2033 |
2034 |
2035 |
2036 |
2037 |
2038 |
2039 |
2040 | )
2041 | }
--------------------------------------------------------------------------------