diff --git a/coauthornetwork/pipelines.py b/coauthornetwork/pipelines.py index aebd51a..1025772 100644 --- a/coauthornetwork/pipelines.py +++ b/coauthornetwork/pipelines.py @@ -18,6 +18,7 @@ def __init__(self, mongo_uri, mongo_db): self.mongo_db = mongo_db self.db = None self.client = None + self.items = [] @classmethod def from_crawler(cls, crawler): @@ -33,10 +34,18 @@ def open_spider(self, spider): def process_item(self, item, spider): try: - self.db[self.collection_name].insert_one(dict(item)) + self.items.append(item) + if len(self.items) % 10 == 0: + print "length: ", len(self.items) + self.db[self.collection_name].insert_many(self.items) + print "-----------------------BULK INSERT COMPLETE--------------------" + self.items = [] + else: + print "False condition" return item except Exception: raise DropItem() def close_spider(self, item, spider): - self.client.close() \ No newline at end of file + self.client.close() +