-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawlers.php
executable file
·97 lines (92 loc) · 4.13 KB
/
crawlers.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
<?php
use App\Common\MessageBus\Factories\MessageBusFactory;
use App\Common\MessageBus\Factories\WorkerFactory;
use App\Common\MessageBus\Handlers\Crawlers\GithubContributorsCrawler;
use App\Common\MessageBus\Handlers\Crawlers\GithubFollowersCrawler;
use App\Common\MessageBus\Handlers\Crawlers\GithubProfileCrawler;
use App\Common\MessageBus\Handlers\Crawlers\PageFetcherCrawler;
use App\Common\MessageBus\Handlers\Crawlers\WebFeedFetcherCrawler;
use App\Common\MessageBus\Messages\Crawlers\GithubContributorsToCrawlMessage;
use App\Common\MessageBus\Messages\Crawlers\GithubFollowersToCrawlMessage;
use App\Common\MessageBus\Messages\Crawlers\NewGithubProfileToCrawlMessage;
use App\Common\MessageBus\Messages\Crawlers\NewWebsiteToCrawlMessage;
use App\Common\MessageBus\Messages\Crawlers\WebFeedToCrawlMessage;
use App\Common\Services\HttpClient;
use App\Common\Services\Github\GithubApiFetcher;
use App\Common\Services\Website\WebsiteFetcher;
use Symfony\Component\EventDispatcher\EventDispatcherInterface;
use Symfony\Component\Messenger\Handler\HandlerDescriptor;
use Symfony\Component\Messenger\Transport\RedisExt\RedisReceiver;
use Symfony\Component\Messenger\Transport\RedisExt\RedisTransport;
if (PHP_SAPI !== 'cli') {
throw new \Exception('The script is only for cli');
}
require_once 'vendor/autoload.php';
if ($argc < 2) {
throw new \Exception('consumer name is required');
}
\putenv("REDIS_QUEUE_CONSUMER=$argv[1]");
/** @var \Slim\Container $container */
$container = require __DIR__ . '/../config/container.php';
/** @var EventDispatcherInterface $dispatcher */
$dispatcher = $container->get(EventDispatcherInterface::class);
/** @var RedisTransport $transport */
$connection = $container->get(CONTAINER_CONFIG_REDIS_STREAM_CONNECTION_CRAWLERS);
$receivers = [
CONTAINER_CONFIG_REDIS_STREAM_TRANSPORT_CRAWLERS => new RedisReceiver(
$connection,
$container->get(CONTAINER_CONFIG_REDIS_STREAM_SERIALIZER)
),
];
$websiteFetcher = new WebsiteFetcher(new HttpClient('hpdb-bot-c/0.1 (+https://hpdb.ru/crawler)'), \getenv('DAEMONS_WEBSITE_FETCHER_MAX_SIZE_BYTES'));
$apiFetcher = new GithubApiFetcher(new HttpClient('hpdb-bot-a/0.1 (+https://hpdb.ru/crawler)'), \getenv('GITHUB_API_AUTH'));
/** @var \Symfony\Component\Messenger\MessageBusInterface $persistorBus */
$persistorBus = $container->get(CONTAINER_CONFIG_REDIS_STREAM_PERSISTORS);
/** @var \Symfony\Component\Messenger\MessageBusInterface $processorsBus */
$processorsBus = $container->get(CONTAINER_CONFIG_REDIS_STREAM_PROCESSORS);
$factory = new MessageBusFactory($container);
// add only /crawlers handlers
$factory->addHandler(
NewWebsiteToCrawlMessage::class,
new HandlerDescriptor(
new PageFetcherCrawler(\getenv('REDIS_QUEUE_CONSUMER'), $websiteFetcher, $persistorBus),
[
'from_transport' => PageFetcherCrawler::TRANSPORT
]
)
)->addHandler(
WebFeedToCrawlMessage::class,
new HandlerDescriptor(
new WebFeedFetcherCrawler(\getenv('REDIS_QUEUE_CONSUMER'), $websiteFetcher, $processorsBus),
[
'from_transport' => WebFeedFetcherCrawler::TRANSPORT,
]
)
)->addHandler(
NewGithubProfileToCrawlMessage::class,
new HandlerDescriptor(
new GithubProfileCrawler(\getenv('REDIS_QUEUE_CONSUMER'), $apiFetcher, $processorsBus),
[
'from_transport' => GithubProfileCrawler::TRANSPORT
]
)
)->addHandler(
GithubFollowersToCrawlMessage::class,
new HandlerDescriptor(
new GithubFollowersCrawler(\getenv('REDIS_QUEUE_CONSUMER'), $apiFetcher, $processorsBus),
[
'from_transport' => GithubFollowersCrawler::TRANSPORT
]
)
)->addHandler(
GithubContributorsToCrawlMessage::class,
new HandlerDescriptor(
new GithubContributorsCrawler(\getenv('REDIS_QUEUE_CONSUMER'), $apiFetcher, $processorsBus, $persistorBus),
[
'from_transport' => GithubContributorsCrawler::TRANSPORT,
]
)
);
$worker = WorkerFactory::createExceptionHandlingWorker($receivers, $factory->buildMessageBus(), $container->get(CONTAINER_CONFIG_LOGGER), $container->get(CONTAINER_CONFIG_METRICS), $dispatcher);
unset($factory);
$worker->run();