-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsteam_comments.py
153 lines (131 loc) · 4.88 KB
/
steam_comments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import sys
import requests
import re
from bs4 import BeautifulSoup
from datetime import datetime
def extract_workshop_details(url):
# Fetch the Workshop page HTML
response = requests.get(url)
if response.status_code != 200:
print("Failed to fetch the Workshop page. Please check the URL.")
sys.exit(1)
# Parse the HTML content to extract owner_id and title
soup = BeautifulSoup(response.content, "html.parser")
script_tags = soup.find_all("script")
# Extract owner_id
owner_id = None
for script in script_tags:
if script.string:
match = re.search(r'"owner":\s*"(\d+)"', script.string)
if match:
owner_id = match.group(1)
break
if not owner_id:
print("Failed to extract owner_id from the Workshop page.")
sys.exit(1)
# Extract title using the workshopItemTitle class
title_tag = soup.find("div", class_="workshopItemTitle")
if not title_tag:
print("Failed to extract title from the Workshop page.")
sys.exit(1)
title = title_tag.text.strip()
# Extract the workshop_id from the URL
match = re.search(r"id=(\d+)", url)
if not match:
print("Invalid URL format. Make sure it includes 'id=' followed by the Workshop item ID.")
sys.exit(1)
workshop_id = match.group(1)
return workshop_id, owner_id, title
def fetch_comments(workshop_id, owner_id):
all_comments = []
start = 0
while True:
# Construct the URL and POST data for fetching comments
url = f"https://steamcommunity.com/comment/PublishedFile_Public/render/{owner_id}/{workshop_id}/"
data = {
"start": start,
"totalcount": 0,
"count": 100 # Fetch 100 comments at a time to minimize requests
}
response = requests.post(url, data=data)
json_data = response.json()
# Extract the HTML content of the comments
comments_html = json_data.get("comments_html", "")
if not comments_html:
print("No more comments found. Exiting loop.")
break
all_comments.append(comments_html)
start += 100 # Increment by 100 to fetch the next batch
print(f"Total comments fetched: {len(all_comments)} batches.")
return all_comments
def create_html_file(comments, title):
# Format the current date and time
current_time = datetime.now().strftime("%Y%m%d %H_%M")
# Create the file name using the title and current date/time
filename = f"{title} - All Comments - {current_time}.html".replace(":", "-")
print(f"Creating HTML file: {filename}")
# Updated CSS for side-by-side avatar layout
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Workshop Comments</title>
<style>
body {
background-color: #1b2838;
color: #c7d5e0;
font-family: Arial, Helvetica, sans-serif;
padding: 20px;
}
h1 {
color: #66c0f4;
}
.commentthread_comment {
display: flex;
background-color: #2a475e;
border: 1px solid #3c5a77;
padding: 10px;
margin-bottom: 10px;
border-radius: 4px;
align-items: flex-start;
}
.commentthread_comment_avatar {
flex-shrink: 0;
margin-right: 10px;
}
.commentthread_comment_avatar img {
width: 50px;
height: 50px;
border-radius: 4px; /* Square shape with rounded corners */
}
.comment_content {
flex-grow: 1;
}
.commentthread_comment_author {
font-weight: bold;
color: #66c0f4;
}
.commentthread_comment_text {
margin-top: 5px;
}
</style>
</head>
<body>
<h1>All Comments</h1>
"""
for comment_html in comments:
html_content += comment_html
html_content += "</body></html>"
# Write the HTML content to the file
with open(filename, "w", encoding="utf-8") as file:
file.write(html_content)
print(f"HTML file created successfully: {filename}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python steam_comments_fetcher.py <workshop_url>")
sys.exit(1)
workshop_url = sys.argv[1]
workshop_id, owner_id, title = extract_workshop_details(workshop_url)
print(f"Workshop ID: {workshop_id}, Owner ID: {owner_id}, Title: {title}")
comments = fetch_comments(workshop_id, owner_id)
create_html_file(comments, title)