Text Extractor
The 3 PHP scripts below interact with OpenAI GPT to either describe or run intelligent character recognition.
They are used by a custom PHPRunner text extraction system, click the Text Extractor link above for more info.

.env
OPENAI_API_KEY=sk-proj-abc123...Replace with your key
DB_HOST=localhost
DB_NAME=FileDatabase
DB_USER=admin
DB_PASS=adminpassword


gpt.php
<?php
function loadEnv($path) {
    if (!file_exists($path)) return;
    $lines = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
    foreach ($lines as $line) {
        if (strpos(trim($line), '#') === 0) continue;
        list($key, $value) = explode('=', $line, 2);
        $_ENV[trim($key)] = trim($value);
    }
}

loadEnv(__DIR__ . '/.env');

$apiKey = $_ENV['OPENAI_API_KEY'] ?? '';
$dbHost = $_ENV['DB_HOST'] ?? 'localhost';
$dbName = $_ENV['DB_NAME'] ?? '';
$dbUser = $_ENV['DB_USER'] ?? '';
$dbPass = $_ENV['DB_PASS'] ?? '';

if (!$apiKey || !$dbName || !$dbUser) {
    http_response_code(500);
    exit("Missing required environment variables.");
}

function describeImageFromUrl($imageUrl, $apiKey) {
    $payload = [
        "model" => "gpt-4-turbo",
        "messages" => [[
            "role" => "user",
            "content" => [
                ["type" => "image_url", "image_url" => ["url" => $imageUrl]],
                ["type" => "text", "text" => "Describe this image in detail."]
            ]
        ]],
        "max_tokens" => 500
    ];

    $ch = curl_init("https://api.openai.com/v1/chat/completions");
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HTTPHEADER => [
            "Content-Type: application/json",
            "Authorization: Bearer $apiKey"
        ],
        CURLOPT_POSTFIELDS => json_encode($payload)
    ]);
    $response = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    $result = json_decode($response, true);
    if ($httpCode === 200 && isset($result['choices'][0]['message']['content'])) {
        return $result['choices'][0]['message']['content'];
    } elseif (isset($result['error'])) {
        return "API Error: " . $result['error']['message'];
    } else {
        return "Unexpected response format.";
    }
}

function updateRecordDescription($recordId, $description, $dbHost, $dbName, $dbUser, $dbPass) {
    try {
        $pdo = new PDO("mysql:host=$dbHost;dbname=$dbName;charset=utf8mb4", $dbUser, $dbPass);
        $stmt = $pdo->prepare("UPDATE filelist SET FileText = ? WHERE RecordID = ?");
        $stmt->execute([$description, $recordId]);
    } catch (PDOException $e) {
        http_response_code(500);
        exit("Database error: " . $e->getMessage());
    }
}

// Main logic
if (!isset($_GET['image_url']) || !isset($_GET['recordid'])) {
    http_response_code(400);
    exit("Missing image_url or recordid parameter.");
}

$imageUrl = trim($_GET['image_url']);
$recordId = intval($_GET['recordid']);

$description = describeImageFromUrl($imageUrl, $apiKey);
updateRecordDescription($recordId, $description, $dbHost, $dbName, $dbUser, $dbPass);

exit;
?>

gptocr.php
<?php
function loadEnv($path) {
    if (!file_exists($path)) return;
    $lines = file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
    foreach ($lines as $line) {
        if (strpos(trim($line), '#') === 0) continue;
        list($key, $value) = explode('=', $line, 2);
        $_ENV[trim($key)] = trim($value);
    }
}

loadEnv(__DIR__ . '/.env');

$apiKey = $_ENV['OPENAI_API_KEY'] ?? '';
$dbHost = $_ENV['DB_HOST'] ?? 'localhost';
$dbName = $_ENV['DB_NAME'] ?? '';
$dbUser = $_ENV['DB_USER'] ?? '';
$dbPass = $_ENV['DB_PASS'] ?? '';

if (!$apiKey || !$dbName || !$dbUser) {
    http_response_code(500);
    exit("Missing required environment variables.");
}

function describeImageFromUrl($imageUrl, $apiKey) {
    $payload = [
        "model" => "gpt-4-turbo",
        "messages" => [[
            "role" => "user",
            "content" => [
                ["type" => "image_url", "image_url" => ["url" => $imageUrl]],
                ["type" => "text", "text" => "Extract all visible text from this image. Return only the text exactly as it appears, preserving line breaks, spacing, and punctuation. Do not summarize or describe the image."]
            ]
        ]],
        "max_tokens" => 1500
    ];

    $ch = curl_init("https://api.openai.com/v1/chat/completions");
    curl_setopt_array($ch, [
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_HTTPHEADER => [
            "Content-Type: application/json",
            "Authorization: Bearer $apiKey"
        ],
        CURLOPT_POSTFIELDS => json_encode($payload)
    ]);
    $response = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    $result = json_decode($response, true);
    if ($httpCode === 200 && isset($result['choices'][0]['message']['content'])) {
        return $result['choices'][0]['message']['content'];
    } elseif (isset($result['error'])) {
        return "API Error: " . $result['error']['message'];
    } else {
        return "Unexpected response format.";
    }
}

function updateRecordDescription($recordId, $description, $dbHost, $dbName, $dbUser, $dbPass) {
    try {
        $pdo = new PDO("mysql:host=$dbHost;dbname=$dbName;charset=utf8mb4", $dbUser, $dbPass);
        $stmt = $pdo->prepare("UPDATE filelist SET FileText = ? WHERE RecordID = ?");
        $stmt->execute([$description, $recordId]);
    } catch (PDOException $e) {
        http_response_code(500);
        exit("Database error: " . $e->getMessage());
    }
}

// Main logic
if (!isset($_GET['image_url']) || !isset($_GET['recordid'])) {
    http_response_code(400);
    exit("Missing image_url or recordid parameter.");
}

$imageUrl = trim($_GET['image_url']);
$recordId = intval($_GET['recordid']);

$description = describeImageFromUrl($imageUrl, $apiKey);
updateRecordDescription($recordId, $description, $dbHost, $dbName, $dbUser, $dbPass);

exit;
?>