Git pushed after crawling #1
This commit is contained in:
parent
8d5a669d9e
commit
5b0a9bfbe2
File diff suppressed because it is too large
Load Diff
12475
spacetime-crawler4py-master/crawler/q1.txt
Normal file
12475
spacetime-crawler4py-master/crawler/q1.txt
Normal file
File diff suppressed because it is too large
Load Diff
1
spacetime-crawler4py-master/crawler/q2.txt
Normal file
1
spacetime-crawler4py-master/crawler/q2.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
Longest Page: http://www.ics.uci.edu/~cs224, length: 83259
|
50
spacetime-crawler4py-master/crawler/q3.txt
Normal file
50
spacetime-crawler4py-master/crawler/q3.txt
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
research: 71407
|
||||||
|
computer: 44358
|
||||||
|
science: 35764
|
||||||
|
ics: 31878
|
||||||
|
students: 31271
|
||||||
|
uci: 30946
|
||||||
|
events: 28911
|
||||||
|
news: 28680
|
||||||
|
student: 28244
|
||||||
|
information: 28159
|
||||||
|
informatics: 27680
|
||||||
|
graduate: 27322
|
||||||
|
0: 26001
|
||||||
|
school: 25154
|
||||||
|
2021: 24609
|
||||||
|
bren: 24296
|
||||||
|
data: 23332
|
||||||
|
us: 22961
|
||||||
|
undergraduate: 22912
|
||||||
|
faculty: 22357
|
||||||
|
2020: 22133
|
||||||
|
software: 22105
|
||||||
|
learning: 21218
|
||||||
|
policies: 20976
|
||||||
|
1: 19559
|
||||||
|
contact: 18653
|
||||||
|
2018: 17102
|
||||||
|
alumni: 17032
|
||||||
|
2: 16758
|
||||||
|
donald: 16690
|
||||||
|
projects: 16319
|
||||||
|
2019: 15778
|
||||||
|
computing: 15414
|
||||||
|
people: 15237
|
||||||
|
irvine: 15146
|
||||||
|
academic: 15127
|
||||||
|
support: 14680
|
||||||
|
2017: 14599
|
||||||
|
view: 14582
|
||||||
|
2016: 14330
|
||||||
|
ramesh: 14140
|
||||||
|
engineering: 13971
|
||||||
|
university: 13744
|
||||||
|
may: 13308
|
||||||
|
sciences: 13175
|
||||||
|
systems: 13164
|
||||||
|
course: 12868
|
||||||
|
statistics: 12582
|
||||||
|
media: 12577
|
||||||
|
new: 12501
|
86
spacetime-crawler4py-master/crawler/q4.txt
Normal file
86
spacetime-crawler4py-master/crawler/q4.txt
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
http://Transformativeplay.ics.uci.edu, 1 +
|
||||||
|
http://accessibility.ics.uci.edu, 1 +
|
||||||
|
http://acoi.ics.uci.edu, 52 +
|
||||||
|
http://aiclub.ics.uci.edu, 1 +
|
||||||
|
http://archive.ics.uci.edu, 6 +
|
||||||
|
http://asterix.ics.uci.edu, 7 +
|
||||||
|
http://cbcl.ics.uci.edu, 23 +
|
||||||
|
http://cert.ics.uci.edu, 5 +
|
||||||
|
http://checkmate.ics.uci.edu, 1 +
|
||||||
|
http://chenli.ics.uci.edu, 9 +
|
||||||
|
http://cloudberry.ics.uci.edu, 45 +
|
||||||
|
http://cml.ics.uci.edu, 172 +
|
||||||
|
http://code.ics.uci.edu, 12 +
|
||||||
|
http://computableplant.ics.uci.edu, 33 +
|
||||||
|
http://cradl.ics.uci.edu, 20 +
|
||||||
|
http://create.ics.uci.edu, 6 +
|
||||||
|
http://cwicsocal18.ics.uci.edu, 12 +
|
||||||
|
http://cyberclub.ics.uci.edu, 14 +
|
||||||
|
http://dgillen.ics.uci.edu, 19 +
|
||||||
|
http://duttgroup.ics.uci.edu, 85 +
|
||||||
|
http://elms.ics.uci.edu, 1 +
|
||||||
|
http://emj.ics.uci.edu, 45 +
|
||||||
|
http://evoke.ics.uci.edu, 62 +
|
||||||
|
http://flamingo.ics.uci.edu, 11 +
|
||||||
|
http://fr.ics.uci.edu, 3 +
|
||||||
|
http://frost.ics.uci.edu, 1 +
|
||||||
|
http://futurehealth.ics.uci.edu, 72 +
|
||||||
|
http://graphics.ics.uci.edu, 4 +
|
||||||
|
http://hack.ics.uci.edu, 1 +
|
||||||
|
http://hai.ics.uci.edu, 3 +
|
||||||
|
http://helpdesk.ics.uci.edu, 3 +
|
||||||
|
http://hobbes.ics.uci.edu, 1 +
|
||||||
|
http://i-sensorium.ics.uci.edu, 1 +
|
||||||
|
http://iasl.ics.uci.edu, 17 +
|
||||||
|
http://industryshowcase.ics.uci.edu, 23 +
|
||||||
|
http://www.informatics.ics.uci.edu, 1 +
|
||||||
|
http://intranet.ics.uci.edu, 2 +
|
||||||
|
http://ipf.ics.uci.edu, 2 +
|
||||||
|
http://ipubmed.ics.uci.edu, 1 +
|
||||||
|
http://isg.ics.uci.edu, 104 +
|
||||||
|
http://jgarcia.ics.uci.edu, 23 +
|
||||||
|
http://luci.ics.uci.edu, 4 +
|
||||||
|
http://malek.ics.uci.edu, 1 +
|
||||||
|
http://mcs.ics.uci.edu, 31 +
|
||||||
|
http://mdogucu.ics.uci.edu, 1 +
|
||||||
|
http://mds.ics.uci.edu, 11 +
|
||||||
|
http://mhcid.ics.uci.edu, 16 +
|
||||||
|
http://mondego.ics.uci.edu, 3 +
|
||||||
|
http://motifmap.ics.uci.edu, 2 +
|
||||||
|
http://mse.ics.uci.edu, 2 +
|
||||||
|
http://mswe.ics.uci.edu, 16 +
|
||||||
|
http://mt-live.ics.uci.edu, 1634 +
|
||||||
|
http://nalini.ics.uci.edu, 7 +
|
||||||
|
http://ngs.ics.uci.edu, 2000 +
|
||||||
|
http://perennialpolycultures.ics.uci.edu, 1 +
|
||||||
|
http://plrg.ics.uci.edu, 14 +
|
||||||
|
http://psearch.ics.uci.edu, 1 +
|
||||||
|
http://radicle.ics.uci.edu, 1 +
|
||||||
|
http://redmiles.ics.uci.edu, 4 +
|
||||||
|
http://riscit.ics.uci.edu, 1 +
|
||||||
|
http://sconce.ics.uci.edu, 2 +
|
||||||
|
http://sdcl.ics.uci.edu, 205 +
|
||||||
|
http://seal.ics.uci.edu, 6 +
|
||||||
|
http://sherlock.ics.uci.edu, 7 +
|
||||||
|
http://sli.ics.uci.edu, 338 +
|
||||||
|
http://sourcerer.ics.uci.edu, 1 +
|
||||||
|
http://sprout.ics.uci.edu, 2 +
|
||||||
|
http://stairs.ics.uci.edu, 4 +
|
||||||
|
http://statconsulting.ics.uci.edu, 5 +
|
||||||
|
http://student-council.ics.uci.edu, 1 +
|
||||||
|
http://studentcouncil.ics.uci.edu, 3 +
|
||||||
|
http://support.ics.uci.edu, 4 +
|
||||||
|
http://swiki.ics.uci.edu, 42 +
|
||||||
|
http://tad.ics.uci.edu, 3 +
|
||||||
|
http://tastier.ics.uci.edu, 1 +
|
||||||
|
http://tippers.ics.uci.edu, 1 +
|
||||||
|
http://tippersweb.ics.uci.edu, 5 +
|
||||||
|
http://transformativeplay.ics.uci.edu, 58 +
|
||||||
|
http://tutors.ics.uci.edu, 44 +
|
||||||
|
http://ugradforms.ics.uci.edu, 3 +
|
||||||
|
http://unite.ics.uci.edu, 10 +
|
||||||
|
http://vision.ics.uci.edu, 200 +
|
||||||
|
http://wearablegames.ics.uci.edu, 11 +
|
||||||
|
http://wics.ics.uci.edu, 970 +
|
||||||
|
http://www-db.ics.uci.edu, 10 +
|
||||||
|
http://xtune.ics.uci.edu, 6 +
|
BIN
spacetime-crawler4py-master/frontier.shelve
Normal file
BIN
spacetime-crawler4py-master/frontier.shelve
Normal file
Binary file not shown.
432620
spacetime-crawler4py-master/invalid_links.txt
Normal file
432620
spacetime-crawler4py-master/invalid_links.txt
Normal file
File diff suppressed because it is too large
Load Diff
541261
spacetime-crawler4py-master/valid_links.txt
Normal file
541261
spacetime-crawler4py-master/valid_links.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user