stormbrigade_sheriff/scrape_hoh.py

127 lines
4.8 KiB
Python

import requests
from bs4 import BeautifulSoup
import db
import sys
import motor
weird_worlds = {
'1201': {"world_name": "Global Conflict 2", "short_name": "GC2"},
'1250': {"world_name": "Global Realm 1", "short_name": "GR1"},
'1200': {"world_name": "Global Conflict 1", "short_name": "GC1"},
'1251': {"world_name": "Global Realm 2", "short_name": "GR2"},
'1253': {"world_name": "Global Conflict 3", "short_name": "GC3"},
'1254': {"world_name": "Global Realm 3", "short_name": "GR3"},
'1255': {"world_name": "Global Conflict 4", "short_name": "GC4"},
'1256': {"world_name": "Global Realm 4", "short_name": "GR4"},
'1257': {"world_name": "Global Conflict 5", "short_name": "GC5"},
'1258': {"world_name": "Global Realm 5", "short_name": "GR5"},
'1259': {"world_name": "Global Realm 6", "short_name": "GR6"},
'1260': {"world_name": "Global Conflict 6", "short_name": "GC6"},
'1261': {"world_name": "Global Realm 7", "short_name": "GR7"},
'1262': {"world_name": "Global Conflict 7", "short_name": "GC7"},
'1263': {"world_name": "Global Realm 8", "short_name": "GR8"},
'1264': {"world_name": "Global Conflict 8", "short_name": "GC8"},
'2500': {"world_name": "Domination 1", "short_name": "D1"},
'2501': {"world_name": "Domination 2", "short_name": "D2"},
'2551': {"world_name": "Domination 3", "short_name": "D3"},
'2502': {"world_name": "Domination 4", "short_name": "D4"},
'2550': {"world_name": "Domination 5", "short_name": "D5"},
}
world_types = {
'1': {'world_name':'World','short_name':'W'},
'2': {'world_name':'Welt','short_name':'De'},
'3': {'world_name':'Monde','short_name':'Fr'},
'4': {'world_name':'Russia','short_name':'Ru'},
'5': {'world_name':'Mundo','short_name':'Es'},
'6': {'world_name':'Poland','short_name':'Pl'},
'7': {'world_name':'Europe','short_name':'Eu'},
'8': {'world_name':'Turkey','short_name':'Tr'},
'9': {'world_name':'Welt','short_name':'Us'},
'10': {'world_name':'Mondo','short_name':'It'},
'11': {'world_name':'Latin America','short_name':'LA'},
'13': {'world_name':'Island Warfare','short_name':'IW'},
'14': {'world_name':'China','short_name':'Cn'},
'15': {'world_name':'Kingmaker','short_name':'Km'},
'16': {'world_name':'Sea of Japan','short_name':'Jp'},
}
sys.stdout.reconfigure(encoding='utf-8')
#Get targets from https://www.strongholdkingdoms.com/glory/HoH.php
url = 'https://www.strongholdkingdoms.com/glory/HoH.php'
response = requests.get(url)
website = response.text
world_soup = BeautifulSoup(website,'html.parser')
linka = world_soup.find_all('a')
links = []
unique_worlds = {}
type_counts = {}
#session = db.create_session()
for link in linka:
temp_dict = {}
linkStr = str(link.get('href'))
worldID = linkStr.partition('HoH.php?worldid=')[2].partition('&age=')[0]
ageID = linkStr.partition('&age=')[2]
world_type = worldID[:-2]
if world_type in type_counts.keys():
if not worldID in type_counts[world_type]:
type_counts[world_type].append(worldID)
else:
type_counts[world_type] = [worldID]
if world_type in world_types:
temp_dict['world_name'] = world_types[world_type]['world_name']
temp_dict['short_name'] = world_types[world_type]['short_name']
temp_dict['world_name'] += ' ' + str(len(type_counts[world_type]))
temp_dict['short_name'] += str(len(type_counts[world_type]))
temp_dict['age'] = int(ageID)
if ageID == '7': temp_dict['ended'] = True
else : temp_dict['ended'] = False
if worldID in weird_worlds:
temp_dict['world_name'] = weird_worlds[worldID]['world_name']
temp_dict['short_name'] = weird_worlds[worldID]['short_name']
temp_dict['age'] = int(ageID)
if ageID == '7': temp_dict['ended'] = True
else : temp_dict['ended'] = False
temp_dict['_id'] = int(worldID)
temp_dict['hoh_scraped'] = False
unique_worlds[worldID] = temp_dict.copy()
import asyncio
import sqlite3
import pymongo
# SQLite connection settings
sqlite_db_file = "db.sqlite"
sqlite_table_name = "houses"
# MongoDB connection settings
mongodb_uri = "mongodb://sheriff:unnipus1213@10.0.0.21:27017/?retryWrites=true&serverSelectionTimeoutMS=5000&connectTimeoutMS=10000&authSource=stormbrigade&authMechanism=SCRAM-SHA-256"
mongodb_db_name = "stormbrigade"
mongodb_collection_name = "worlds"
documents = []
for wrld in unique_worlds.values():
documents.append(wrld)
# Connect to MongoDB
mongodb_client = pymongo.MongoClient(mongodb_uri)
mongodb_collection = mongodb_client[mongodb_db_name][mongodb_collection_name]
# Insert documents into MongoDB collection
result = mongodb_collection.insert_many(documents)
print(f"Inserted {len(result.inserted_ids)} documents into MongoDB")
mongodb_client.close()
print(documents)
#session.commit()
#session.close()