feat: specs research agent reports corrected model number

When Tavily search results reveal that the OCR'd model number has a
character error, the specs_research prompt asks the LLM to output a
CORRECTED_MODEL_NUMBER line. The agent parses it out, stores it in the
job output, and DraftArticleHandler applies it to the article in
preference to the raw vision value.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Simon Kuehn 2026-05-18 11:00:41 +00:00
parent 4515911b27
commit 5b2a200fc2
4 changed files with 95 additions and 7 deletions

View file

@ -0,0 +1,63 @@
<?php
declare(strict_types=1);
namespace DoctrineMigrations;
use Doctrine\DBAL\Schema\Schema;
use Doctrine\Migrations\AbstractMigration;
final class Version20260520050000 extends AbstractMigration
{
public function getDescription(): string
{
return 'Update specs_research prompt to report corrected model numbers';
}
public function up(Schema $schema): void
{
$body = <<<'PROMPT'
You are a hardware specifications expert. Extract the technical specifications for the {{articleType}}: "{{subject}}".
Web search results:
{{searchResults}}
Based on the search results above, list all technical specifications including:
processor, RAM, storage variants, display size and resolution, GPU, battery capacity,
ports, connectivity, weight, dimensions, OS, and any other relevant specs.
Be specific and accurate. If a spec is not found in the search results, omit it rather than guessing.
If the search results reveal that the model number in "{{subject}}" contains an OCR error
(e.g. a letter misread as a digit), output the corrected model number on the very first line
in exactly this format, then leave a blank line before the specs:
CORRECTED_MODEL_NUMBER: <corrected number>
If the model number is correct or no model number was provided, omit the CORRECTED_MODEL_NUMBER line entirely.
PROMPT;
$this->addSql('UPDATE app.prompt_templates SET body = :body, updated_at = NOW() WHERE key = :key', [
'body' => $body,
'key' => 'specs_research',
]);
}
public function down(Schema $schema): void
{
$body = <<<'PROMPT'
You are a hardware specifications expert. Extract the technical specifications for the {{articleType}}: "{{subject}}".
Web search results:
{{searchResults}}
Based on the search results above, list all technical specifications including:
processor, RAM, storage variants, display size and resolution, GPU, battery capacity,
ports, connectivity, weight, dimensions, OS, and any other relevant specs.
Be specific and accurate. If a spec is not found in the search results, omit it rather than guessing.
PROMPT;
$this->addSql('UPDATE app.prompt_templates SET body = :body, updated_at = NOW() WHERE key = :key', [
'body' => $body,
'key' => 'specs_research',
]);
}
}

View file

@ -18,7 +18,10 @@ final class SpecsResearchAgent
) { ) {
} }
public function research(string $modelName, string $articleTypeName, string $manufacturer = ''): string /**
* @return array{specsText: string, correctedModelNumber: string}
*/
public function research(string $modelName, string $articleTypeName, string $manufacturer = ''): array
{ {
$subject = trim(($manufacturer !== '' ? $manufacturer.' ' : '').$modelName); $subject = trim(($manufacturer !== '' ? $manufacturer.' ' : '').$modelName);
@ -36,6 +39,23 @@ final class SpecsResearchAgent
throw new \RuntimeException("No specifications found for model: {$modelName}"); throw new \RuntimeException("No specifications found for model: {$modelName}");
} }
return $result; return $this->parseResponse($result);
}
/** @return array{specsText: string, correctedModelNumber: string} */
private function parseResponse(string $raw): array
{
$correctedModelNumber = '';
if (preg_match('/^CORRECTED_MODEL_NUMBER:\s*(\S+)/m', $raw, $matches)) {
$correctedModelNumber = trim($matches[1]);
// Strip the line from the specs text
$raw = preg_replace('/^CORRECTED_MODEL_NUMBER:[^\n]*\n?/m', '', $raw) ?? $raw;
}
return [
'specsText' => trim($raw),
'correctedModelNumber' => $correctedModelNumber,
];
} }
} }

View file

@ -67,8 +67,10 @@ final class DraftArticleHandler
if (isset($vision['manufacturer']) && '' !== $vision['manufacturer']) { if (isset($vision['manufacturer']) && '' !== $vision['manufacturer']) {
$article->setManufacturer((string) $vision['manufacturer']); $article->setManufacturer((string) $vision['manufacturer']);
} }
if (isset($vision['modelNumber']) && '' !== $vision['modelNumber']) { $correctedModelNumber = (string) ($job->getOutputData()['specs_research']['correctedModelNumber'] ?? '');
$article->setModelNumber((string) $vision['modelNumber']); $modelNumber = $correctedModelNumber !== '' ? $correctedModelNumber : (string) ($vision['modelNumber'] ?? '');
if ('' !== $modelNumber) {
$article->setModelNumber($modelNumber);
} }
if (isset($vision['modelName']) && '' !== $vision['modelName']) { if (isset($vision['modelName']) && '' !== $vision['modelName']) {
$article->setModelName((string) $vision['modelName']); $article->setModelName((string) $vision['modelName']);

View file

@ -45,7 +45,7 @@ final class SpecsResearchHandler
$searchSubject = implode(' ', $parts); $searchSubject = implode(' ', $parts);
try { try {
$specsText = $this->specsAgent->research( $result = $this->specsAgent->research(
$searchSubject, $searchSubject,
$articleType->getName(), $articleType->getName(),
$message->manufacturer, $message->manufacturer,
@ -57,13 +57,16 @@ final class SpecsResearchHandler
return; return;
} }
$job->recordStep('specs_research', ['specsText' => $specsText]); $job->recordStep('specs_research', [
'specsText' => $result['specsText'],
'correctedModelNumber' => $result['correctedModelNumber'],
]);
$this->jobRepository->save($job); $this->jobRepository->save($job);
$this->bus->dispatch(new JsonCodingMessage( $this->bus->dispatch(new JsonCodingMessage(
jobId: $message->jobId, jobId: $message->jobId,
articleTypeId: $message->articleTypeId, articleTypeId: $message->articleTypeId,
specsText: $specsText, specsText: $result['specsText'],
serialNumber: $message->serialNumber, serialNumber: $message->serialNumber,
)); ));
} }