<# 
takeout_batch_year_ops.ps1
Operational (stable) version for Google Takeout Photos ZIP batches

Features
- Processes ALL *.zip in ZipDir in batches (default 4)
- Creates per-batch work folder, scans ONLY that folder, then deletes it
- Moves processed ZIPs to _processed (or deletes them with -DeleteZips)
- Year classification priority: Takeout JSON (photoTakenTime/creationTime) -> EXIF(JPG/JPEG) -> LastWriteTime
- JSON folder cache (per-run) for performance
- Hash de-dup with buffered writes + retry
- Skips bad files without stopping; logs to _bad_files.txt
- Disk-space guard (MinFreeGB); auto-reduces batch 8->6->4 if needed

Usage:
  Set-ExecutionPolicy -Scope Process -ExecutionPolicy Bypass
  .\takeout_batch_year_ops.ps1 -ZipDir "C:\...\Takeout_Zip" -DestRoot "D:\...\From_Google_Takeout" -BatchSize 4 -MinFreeGB 25
#>

param(
  [Parameter(Mandatory=$true)][string]$ZipDir,
  [Parameter(Mandatory=$true)][string]$DestRoot,
  [int]$BatchSize = 4,
  [int]$MinFreeGB = 25,
  [switch]$DeleteZips,
  [switch]$KeepWork,
  [switch]$VerboseLog
)

$MediaExt = @(
  ".jpg",".jpeg",".png",".gif",".webp",".heic",
  ".mp4",".mov",".m4v",".avi",".mkv",
  ".dng",".cr2",".nef",".arw"
)

$ErrorActionPreference = "Continue"

# ---- globals / caches ----
$JsonYearCache = @{}  # dir -> hashtable(baseName -> year)
$HashBuffer    = New-Object System.Collections.Generic.List[string]

function Log($msg){
  $ts = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
  Write-Host "[$ts] $msg"
}

function EnsureDir($p){
  if(-not (Test-Path $p)){
    New-Item -ItemType Directory -Path $p | Out-Null
  }
}

function FreeGB($anyPath){
  $dn=(Get-Item $anyPath).PSDrive.Name
  [Math]::Round((Get-PSDrive -Name $dn).Free/1GB, 2)
}

function AppendTextWithRetry([string]$path, [string]$text, [int]$maxTry=8){
  for($i=1; $i -le $maxTry; $i++){
    try{
      Add-Content -LiteralPath $path -Value $text -ErrorAction Stop
      return $true
    } catch {
      Start-Sleep -Milliseconds (150 * $i)
    }
  }
  return $false
}

function Get7z(){
  $c = Get-Command 7z.exe -ErrorAction SilentlyContinue
  if($c){ return $c.Source }
  return $null
}

function TryGetGoogleJsonTakenYear([string]$mediaPath){
  try{
    $dir  = Split-Path -Parent $mediaPath
    $name = Split-Path -Leaf   $mediaPath

    if(-not $JsonYearCache.ContainsKey($dir)){
      $map = @{}

      Get-ChildItem -LiteralPath $dir -Filter *.json -File -ErrorAction SilentlyContinue | ForEach-Object {
        try{
          $j = Get-Content -LiteralPath $_.FullName -Raw | ConvertFrom-Json

          $ts = $null
          if($j.photoTakenTime -and $j.photoTakenTime.timestamp){
            $ts = $j.photoTakenTime.timestamp
          } elseif($j.creationTime -and $j.creationTime.timestamp){
            $ts = $j.creationTime.timestamp
          }

          if($ts){
            $dt = [DateTimeOffset]::FromUnixTimeSeconds([int64]$ts).LocalDateTime
            $year = $dt.Year.ToString()

            # Example JSON names:
            #   IMG_1234.jpg.json  -> BaseName = "IMG_1234.jpg"
            #   IMG_1234.jpg.supplemental-metadata.json -> BaseName = "IMG_1234.jpg.supplemental-metadata"
            $base = $_.BaseName -replace '\.supplemental-metadata$',''

            # Store both forms to maximize match:
            # 1) With extension included (e.g., IMG_1234.jpg)
            $map[$base] = $year

            # 2) Without extension (e.g., IMG_1234)
            $baseNoExt = [IO.Path]::GetFileNameWithoutExtension($base)
            if($baseNoExt -and -not $map.ContainsKey($baseNoExt)){
              $map[$baseNoExt] = $year
            }
          }
        } catch {}
      }

      $JsonYearCache[$dir] = $map
    }

    $lookup = $JsonYearCache[$dir]

    # Try exact name first (handles keys like IMG_1234.jpg)
    if($lookup.ContainsKey($name)){ return $lookup[$name] }

    # Then try without extension
    $keyNoExt = [IO.Path]::GetFileNameWithoutExtension($name)
    if($lookup.ContainsKey($keyNoExt)){ return $lookup[$keyNoExt] }

  } catch {}

  return $null
}

function TryExifDate([string]$path){
  $ext=[IO.Path]::GetExtension($path).ToLower()
  if($ext -ne ".jpg" -and $ext -ne ".jpeg"){ return $null }

  try{
    Add-Type -AssemblyName System.Drawing -ErrorAction SilentlyContinue | Out-Null
    $bytes = [System.IO.File]::ReadAllBytes($path)
    $ms = New-Object System.IO.MemoryStream(,$bytes)
    $img = [System.Drawing.Image]::FromStream($ms)

    $s = $null
    $pid = 0x9003  # DateTimeOriginal
    if($img.PropertyIdList -contains $pid){
      $p = $img.GetPropertyItem($pid)
      $s = ([Text.Encoding]::ASCII.GetString($p.Value)).Trim([char]0)
    }
    if(-not $s){
      $pid2 = 0x0132 # DateTime
      if($img.PropertyIdList -contains $pid2){
        $p2 = $img.GetPropertyItem($pid2)
        $s = ([Text.Encoding]::ASCII.GetString($p2.Value)).Trim([char]0)
      }
    }

    $img.Dispose()
    $ms.Dispose()

    if(-not $s){ return $null }
    $s = $s.Replace(":", "-", 2)
    return [DateTime]::Parse($s)
  } catch {
    return $null
  }
}

function YearForFile($fi){
  $y = TryGetGoogleJsonTakenYear $fi.FullName
  if($y){ return $y }

  $dt = TryExifDate $fi.FullName
  if($dt){ return $dt.Year.ToString() }

  return $fi.LastWriteTime.Year.ToString()
}

function MoveWithRetry([string]$src,[string]$dst,[int]$maxTry=6){
  for($i=1; $i -le $maxTry; $i++){
    try{
      Move-Item -LiteralPath $src -Destination $dst -ErrorAction Stop
      return $true
    } catch {
      Start-Sleep -Milliseconds (200 * $i)
    }
  }
  return $false
}

function CopyWithRetry([string]$src,[string]$dst,[int]$maxTry=6){
  for($i=1; $i -le $maxTry; $i++){
    try{
      Copy-Item -LiteralPath $src -Destination $dst -ErrorAction Stop
      return $true
    } catch {
      Start-Sleep -Milliseconds (200 * $i)
    }
  }
  return $false
}

function PlaceUnique([string]$src,[string]$destRoot,[hashtable]$hashSet,[string]$hashDb,[string]$badLog){

  # hash with robust skip
  $h = $null
  try{
    $h = (Get-FileHash -Algorithm SHA256 -Path $src -ErrorAction Stop).Hash
  } catch {
    Log ("WARN: Get-FileHash failed. Skipping: {0}" -f $src)
    AppendTextWithRetry $badLog ("HASH_FAIL`t{0}`t{1}" -f $src, $_.Exception.Message) | Out-Null
    return
  }

  if([string]::IsNullOrWhiteSpace($h)){
    Log ("WARN: Null/empty hash. Skipping: {0}" -f $src)
    AppendTextWithRetry $badLog ("HASH_NULL`t{0}" -f $src) | Out-Null
    return
  }

  if($hashSet.ContainsKey($h)){
    if($VerboseLog){ Log ("Duplicate skipped: {0}" -f $src) }
    return
  }

  $fi = Get-Item -LiteralPath $src
  $year = YearForFile $fi

  $yearDir = Join-Path $destRoot $year
  EnsureDir $yearDir

  $base = [IO.Path]::GetFileName($src)
  $dst  = Join-Path $yearDir $base

  # collision-safe name
  if(Test-Path $dst){
    $name = [IO.Path]::GetFileNameWithoutExtension($base)
    $ext  = [IO.Path]::GetExtension($base)
    $k=1
    do{
      $dst = Join-Path $yearDir ("{0}__{1}{2}" -f $name,$k,$ext)
      $k++
    } while(Test-Path $dst)
  }

  $ok = MoveWithRetry $src $dst
  if(-not $ok){
    if($VerboseLog){ Log ("Move failed; trying copy+delete: {0}" -f $src) }
    $ok2 = CopyWithRetry $src $dst
    if(-not $ok2){
      Log ("WARN: Move/Copy failed. Skipping: {0}" -f $src)
      AppendTextWithRetry $badLog ("MOVE_COPY_FAIL`t{0}" -f $src) | Out-Null
      return
    }
    Remove-Item -LiteralPath $src -Force -ErrorAction SilentlyContinue
  }

  $hashSet[$h]=1
  $HashBuffer.Add($h) | Out-Null
}

function FlushHashBuffer([string]$hashDb){
  if($HashBuffer.Count -le 0){ return }
  $text = ($HashBuffer -join "`n")
  if(AppendTextWithRetry $hashDb $text){
    $HashBuffer.Clear()
  } else {
    Log "WARN: Failed to flush hash buffer after retries."
  }
}

function AdjustBatchIfLowSpace([string]$anyPath,[int]$minGB,[int]$curBatch){
  $free = FreeGB $anyPath
  if($free -ge $minGB){ return $curBatch }

  $try=@()
  if($curBatch -gt 6){ $try += 6 }
  if($curBatch -gt 4){ $try += 4 }

  foreach($b in $try){
    Log ("Low disk space: {0}GB < {1}GB. Trying BatchSize={2}." -f $free,$minGB,$b)
    $free2 = FreeGB $anyPath
    if($free2 -ge $minGB){ return $b }
  }
  throw ("Not enough free space: {0}GB (need >= {1}GB). Stop." -f $free,$minGB)
}

# ---------- init ----------
$ZipDir   = (Resolve-Path $ZipDir).Path
EnsureDir $DestRoot

$ProcessedDir = Join-Path $ZipDir "_processed"
EnsureDir $ProcessedDir

$WorkRoot = Join-Path $ZipDir "_work"
EnsureDir $WorkRoot

$HashDb  = Join-Path $DestRoot "_hashes_sha256.txt"
$BadLog  = Join-Path $DestRoot "_bad_files.txt"
if(-not (Test-Path $HashDb)){ New-Item -ItemType File -Path $HashDb | Out-Null }
if(-not (Test-Path $BadLog)){ New-Item -ItemType File -Path $BadLog | Out-Null }

# load existing hashes
Log "Loading hash DB..."
$HashSet=@{}
Get-Content -LiteralPath $HashDb -ErrorAction SilentlyContinue | ForEach-Object {
  $t=$_.Trim()
  if($t.Length -gt 0){ $HashSet[$t]=1 }
}
Log ("Loaded hashes: {0}" -f $HashSet.Count)

$SevenZip = Get7z
if($SevenZip){ Log ("Using 7-Zip: {0}" -f $SevenZip) }
else { Log "7-Zip not found. Using Expand-Archive (slow)." }

$zips = Get-ChildItem -LiteralPath $ZipDir -File -Filter *.zip | Sort-Object Name
if($zips.Count -eq 0){ throw "No ZIP files found in ZipDir." }

Log ("ZIP files found: {0}" -f $zips.Count)

# ---------- batch loop ----------
$idx = 0
$curBatch = $BatchSize

while($idx -lt $zips.Count){

  $curBatch = AdjustBatchIfLowSpace $ZipDir $MinFreeGB $curBatch

  $end = [Math]::Min($idx + $curBatch - 1, $zips.Count - 1)
  $batch = $zips[$idx..$end]

  $batchId = ("{0:0000}_{1}" -f ($idx+1),(Get-Date -Format "yyyyMMdd_HHmmss"))
  $work = Join-Path $WorkRoot $batchId
  EnsureDir $work

  Log ("Batch start: {0} (ZIPs={1}, MinFreeGB={2})" -f $batchId,$batch.Count,$MinFreeGB)

  # extract each zip into its own subfolder under batch work
  foreach($zip in $batch){
    $sub = Join-Path $work ([IO.Path]::GetFileNameWithoutExtension($zip.Name))
    EnsureDir $sub
    Log ("Extract: {0}" -f $zip.Name)

    if($SevenZip){
      & $SevenZip x "`"$($zip.FullName)`"" "-o`"$sub`"" -y | Out-Null
    } else {
      Expand-Archive -LiteralPath $zip.FullName -DestinationPath $sub -Force
    }
  }

  # scan media only under this batch work folder
  Log "Scanning media files..."
  $files = Get-ChildItem -LiteralPath $work -Recurse -File | Where-Object { $MediaExt -contains $_.Extension.ToLower() }
  Log ("Media files found: {0}" -f $files.Count)

  foreach($f in $files){
    PlaceUnique $f.FullName $DestRoot $HashSet $HashDb $BadLog
  }

  FlushHashBuffer $HashDb

  Log ("Batch done. Total hashes now: {0}" -f $HashSet.Count)

  # move/delete processed zips
  foreach($zip in $batch){
    if($DeleteZips){
      Log ("Delete ZIP: {0}" -f $zip.Name)
      Remove-Item -LiteralPath $zip.FullName -Force -ErrorAction SilentlyContinue
    } else {
      Log ("Move ZIP to _processed: {0}" -f $zip.Name)
      Move-Item -LiteralPath $zip.FullName -Destination (Join-Path $ProcessedDir $zip.Name) -Force
    }
  }

  if(-not $KeepWork){
    Log ("Remove temp dir: {0}" -f $work)
    Remove-Item -LiteralPath $work -Recurse -Force -ErrorAction SilentlyContinue
  } else {
    Log ("Keep temp dir: {0}" -f $work)
  }

  $idx += $curBatch

  Log ("Free space now: {0} GB" -f (FreeGB $ZipDir))
  Log "----------------------------------------"
}

Log "All done."
Log ("Output root (LOCAL): {0}" -f $DestRoot)
Log ("Hash DB: {0}" -f $HashDb)
Log ("Bad file log: {0}" -f $BadLog)
if(-not $DeleteZips){ Log ("Processed ZIPs: {0}" -f $ProcessedDir) }
