-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyrex
More file actions
executable file
·272 lines (248 loc) · 11.7 KB
/
pyrex
File metadata and controls
executable file
·272 lines (248 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/python3
from tzip import *
import os, sys, gc
from stat import *
import argparse
import zipfile
import zlib
import struct
import pickle
import tempfile
import traceback
import xml.etree.ElementTree as ET
from multiprocessing import Pool
# Recursive directory lister
# Equivalent to 'find . -type f'
def list_all_files(src_list):
filelist = []
for src in src_list:
try:
srcstat = os.stat(src)
except:
continue
else:
src_mode = srcstat.st_mode
src_size = srcstat.st_size
src_mtime = srcstat.st_mtime
if S_ISDIR(src_mode):
filelist.extend(list_all_files([src + "/" + f for f in os.listdir(src)]))
else:
filelist.append((src, src_size, src_mtime))
return filelist
# Creates a dictionary of (crc, size) for every file in a list,
# mapping them to the file associated to the crc and size
# CRC collisions are easy, but linking it with the file size should
# mitigate the problem. I suppose we could go to SHA1. Yeah, right.
def collect_size_crc(src, cachepath):
files = {}
savecache = False
if cachepath:
try:
cache = pickle.load(open(cachepath))
except:
cache = {}
savecache = True
else:
cache = None
filelist = list_all_files(src)
count = 0
total = len(filelist)
for f, s, m in filelist:
count += 1
print('\rReading source {0}/{1}'.format(count, total), file=sys.stderr, end="")
f_stat = os.stat(f)
# I'm not sure this caching is useful.
if cache != None:
if f in cache:
if cache[f]["size"] == f_stat.st_size and cache[f]["mtime"] == f_stat.st_mtime:
for crc32, size, base, name in cache[f]["roms"]:
filekey = (crc32, size)
if filekey not in files:
files[filekey] = (base, name)
continue
else:
cache[f] = {"size": f_stat.st_size, "mtime": f_stat.st_mtime, "roms": []}
savecache = True
try:
# Don't trust the filename. Treat it like a zip until it stops
# looking like a zip.
with zipfile.ZipFile(f, 'r') as srczip:
for info in srczip.infolist():
if cache != None:
cache[f]["roms"].append((info.CRC, info.file_size, f, info.filename))
savecache = True
filekey = (info.CRC, info.file_size)
if filekey in files:
continue
files[(info.CRC, info.file_size)] = (f, info.filename)
except zipfile.BadZipfile:
# OK, I guess that wasn't a zipfile. Treat it as if it were
# uncompressed. This means 7zips will be ignored.
try:
size = os.stat(f).st_size
crc32 = zlib.crc32(open(f, 'rb').read())
if cache != None:
cache[f]["roms"].append((crc32, size, None, f))
savecache = True
files[(crc32, size)] = (None, f)
except OSError:
print("Bad file " + f + ", skipping", file=sys.stderr)
print("", file=sys.stderr)
if savecache == True and cache != None and cachepath:
pickle.dump(cache, open(cachepath,mode='w'), pickle.HIGHEST_PROTOCOL)
filelist = None
cache = None
return files
# Walks a datfile, checking if the entry has a matching file.
# Outputs a list of games, each game with a list of roms, in order.
def find_files_for_dat(dat, srcs, cachepath):
srclist = collect_size_crc(srcs, cachepath)
dattree = ET.parse(dat)
datroot = dattree.getroot()
newgames = {}
if datroot.tag == "datafile":
# Looking for "machine" or "game". Any others?
# This is where the listxml support would need to come in.
for machine in [m for m in datroot if m.tag == "machine" or m.tag == "game"]:
mname = machine.attrib["name"].strip()
clonename = None
if "cloneof" in machine.attrib:
clonename = mname
mname = machine.attrib["cloneof"].strip()
newroms = {}
# Here's where only merged sets are handled.
# Could easily be fixed to handle split or non-merged sets
if mname in newgames:
newroms = newgames[mname]
for rom in [r for r in machine if r.tag == "rom"]:
# Ignore nodump. Any others?
if "status" in rom.attrib and rom.attrib["status"] == "nodump":
continue
# How can we screw up the rom filename, let me count the ways
# Datfiles use backslashed, zips use forward slashes
# Datfiles put spaces before and after filenames. Spaces!
romname = "/".join([x.strip() for x in rom.attrib["name"].replace("\\", "/").split("/")])
# Datfiles will add a period to the end of the filename
# but when the zip is created, that dot is gone.
while romname[-1] == '.':
romname = romname[:-1]
# Datefiles will use both base 10 and base 16 values for size
try:
romsize = int(rom.attrib["size"])
except ValueError:
romsize = int(rom.attrib["size"], base=16)
# At least CRCs are normal
romcrc = int(rom.attrib["crc"], base=16)
# Check for duplicate filenames. This is important for merged sets
romdups = [x for x in newroms.keys() if x.lower() == romname.lower()]
dupfound = False
namefixed = False
for dupname in romdups:
if newroms[dupname]["size"] == romsize and newroms[dupname]["crc"] == romcrc:
# Yup, dup name and file. Just skip it.
dupname = romdups[0]
dupsize = newroms[dupname]["size"]
dupcrc = newroms[dupname]["crc"]
print("In game {0},".format(machine.attrib["name"]), file=sys.stderr, end="")
print("skipping {0}({1}:{2:08X})".format(romname, romsize, romcrc), file=sys.stderr, end="")
print("as duplicate of {0}({1}:{2:08X})".format(dupname, dupsize, dupcrc), file=sys.stderr)
dupfound = True
else:
# Dup filename, but not file content. Add a path element
# to the romname to make it "unique"
if newroms[dupname]["clonename"] != None:
# This is a dup in another clone. Rename the other
# clone as well
newromname = newroms[dupname]["clonename"] + '/' + dupname
print("Renaming {0} to {1}".format(dupname, newromname))
newroms[newromname] = newroms[dupname]
newroms[newromname]["clonename"] = None
del newroms[dupname]
if not namefixed:
# Now you're unique, just like everyone else
print("Saving {0} in {1}".format(romname, machine.attrib["name"].strip()), end="")
romname = machine.attrib["name"].strip() + '/' + romname
print("to {0} in {1}".format(romname, mname))
if dupfound and not namefixed:
continue
romkey = (romcrc, romsize)
if romkey in srclist:
newroms[romname] = {"size": romsize, "crc": romcrc, "base": srclist[romkey][0], "file": srclist[romkey][1], "clonename": clonename}
else:
newroms[romname] = {"size": romsize, "crc": romcrc, "base": None, "file": None, "clonename": clonename}
newgames[mname] = newroms
# Hood's Balls! This is horrible! Sack the twerp that wrote this!
return [{"machine": x, "roms": sorted([{"name": y, "size": newgames[x][y]["size"], "crc": newgames[x][y]["crc"], "base": newgames[x][y]["base"], "file": newgames[x][y]["file"]} for y in newgames[x]], key=lambda x: str.lower(x["name"]))} for x in newgames]
# Take a game with a list of roms and build a torrentzip
# Using the zipfile module is not possible, as there is not enough control
# over the compresion type and dictionary content to properly create a
# torrentzip file.
def make_zips_from_game(dest, game):
mname = game["machine"]
roms = game["roms"]
if len(roms) == 0:
return
mpath = os.path.join(dest, mname + '.zip')
tmpdir = None
tmpfile = None
# If a file exists at the path of our future zipfile, then be polite and save
# it. It might contain something useful.
if os.path.exists(mpath):
try:
with zipfile.ZipFile(mpath, 'r') as srczip:
# If the zip on disk matches what we need to write, then skip this
# file entirely. We're done.
zinflist = srczip.infolist()
if len(roms) == len(zinflist):
if all(map(lambda x, y: x["name"] == y.filename and x["crc"] == y.CRC and x["size"] == y.file_size, roms, zinflist)):
return
# Move it to a temp directory and keep it's data around. Maybe we
# can use it's contents.
tmpdir = tempfile.mkdtemp(dir=dest)
tmpfile = os.path.join(tmpdir, mname + ".zip")
os.rename(mpath, tmpfile)
tmproms = {}
with zipfile.ZipFile(tmpfile, 'r') as tmpzip:
for tmprom in tmpzip.infolist():
tmproms[(tmprom.CRC, tmprom.file_size)] = tmprom.filename
for rom in roms:
# Well, whaddaya know. A rom was in the datfile, but we couldn't
# find a match. Good thing we found one here or we would have
# had an incomplete set. ;-)
if rom["file"] == None and (rom["crc"], rom["size"]) in tmproms:
rom["base"] = tmpfile
rom["file"] = tmproms[(rom["crc"], rom["size"])]
except:
pass
create_zip_from_files(mpath, roms)
# Remove previous file if it was kept around.
if tmpfile != None:
os.remove(tmpfile)
if tmpdir != None:
os.rmdir(tmpdir)
# Helper class to allow us to multiprocess the zipfile writer
class ZipMaker(object):
def __init__(self, dest):
self.dest = dest
def __call__(self, game):
try:
make_zips_from_game(self.dest, game)
except Exception:
print("Error processing {0}".format(game["machine"]))
print(traceback.format_exc())
raise
if __name__ == '__main__':
pool = Pool()
aparse = argparse.ArgumentParser(description="Process TorrentZipped Romsets")
aparse.add_argument('-sc', '--source-cache', help="Save source cache to file")
aparse.add_argument('dest', help="Destination directory for romset")
aparse.add_argument('dat', help="Romset DAT file")
aparse.add_argument('src', nargs='+', help="Source files or directories")
args = aparse.parse_args()
if not os.path.isdir(args.dest):
os.mkdir(args.dest)
print("Matching...", file=sys.stderr)
games = find_files_for_dat(args.dat, [os.path.normpath(x) for x in args.src], args.source_cache)
games.sort(reverse = True, key=lambda m: sum([x["size"] for x in m["roms"]]))
print("Writing...", file=sys.stderr)
pool.map(ZipMaker(args.dest), games, 2)