Fully changed indexer and worker classes with properly indexing
This commit is contained in:
parent
53c7b49806
commit
c43d6aa0a9
142
indexer.py
142
indexer.py
@ -17,6 +17,7 @@ from bs4 import BeautifulSoup
|
|||||||
from time import perf_counter
|
from time import perf_counter
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
|
||||||
#Data process
|
#Data process
|
||||||
@ -34,43 +35,134 @@ from worker import Worker
|
|||||||
|
|
||||||
|
|
||||||
class Indexer():
|
class Indexer():
|
||||||
def __init__(self,restart):
|
def __init__(self,restart,list_partials,weight,data_paths,worker_factory=Worker):
|
||||||
#Config stuffs
|
#Config stuffs
|
||||||
self.path = "data/DEV/"
|
self.path = "test/"
|
||||||
self.restart = restart
|
self.restart = restart
|
||||||
|
self.list_partials = list_partials
|
||||||
|
self.weight = weight
|
||||||
|
self.data_paths = data_paths
|
||||||
|
self.data_paths_lock = Lock()
|
||||||
|
self.list_partials_lock = Lock()
|
||||||
|
self.workers = list()
|
||||||
|
self.worker_factory = worker_factory
|
||||||
|
|
||||||
def get_data(self):
|
def start_async(self):
|
||||||
num_threads = 1
|
self.workers = [
|
||||||
threads = list()
|
self.worker_factory(worker_id,self)
|
||||||
|
for worker_id in range(8)]
|
||||||
|
for worker in self.workers:
|
||||||
|
worker.start()
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
self.start_async()
|
||||||
|
self.join()
|
||||||
|
|
||||||
|
def join(self):
|
||||||
|
for worker in self.workers:
|
||||||
|
worker.join()
|
||||||
|
|
||||||
|
def get_data_path(self):
|
||||||
for directory in os.listdir(self.path):
|
for directory in os.listdir(self.path):
|
||||||
for file in os.listdir(self.path + "/" + directory + "/"):
|
for file in os.listdir(self.path + "/" + directory + "/"):
|
||||||
while True:
|
self.data_paths.append("data/DEV/" + directory + "/"+file)
|
||||||
file_path = self.path + "" + directory + "/"+file
|
|
||||||
if len(threads) < num_threads:
|
def get_next_file(self):
|
||||||
thread = Worker(self,file_path)
|
self.data_paths_lock.acquire()
|
||||||
threads.append(thread)
|
try:
|
||||||
thread.start()
|
holder = self.data_paths.pop()
|
||||||
break
|
self.data_paths_lock.release()
|
||||||
else:
|
return holder
|
||||||
if not threads[index].is_alive():
|
except IndexError:
|
||||||
threads[index] = Worker(self,file_path)
|
self.data_paths_lock.release()
|
||||||
threads[index].start()
|
return None
|
||||||
break
|
|
||||||
else:
|
def add_partial_index(self,partial_index):
|
||||||
index = index + 1
|
self.list_partials_lock.acquire()
|
||||||
if(index >= num_threads):
|
self.list_partials.append(partial_index)
|
||||||
index = 0
|
self.list_partials_lock.release()
|
||||||
time.sleep(.1)
|
|
||||||
|
|
||||||
#Found 55770 documents
|
#Found 55770 documents
|
||||||
#
|
#
|
||||||
|
|
||||||
#getting important tokens
|
#getting important tokens
|
||||||
|
|
||||||
|
def merge(self):
|
||||||
|
partial_files = list()
|
||||||
|
partial_index_files = list()
|
||||||
|
parital_index_indices = list()
|
||||||
|
merged_index = open("merged_index.full",'w')
|
||||||
|
num_indices = len(self.list_partials)
|
||||||
|
|
||||||
|
#Full Index.Index and Length
|
||||||
|
full_index = Index()
|
||||||
|
full_index.index = list()
|
||||||
|
full_index.length = 0
|
||||||
|
|
||||||
|
for partial_index in self.list_partials:
|
||||||
|
file = open(partial_index+'.partial','r')
|
||||||
|
partial_files.append(file)
|
||||||
|
index = open(partial_index+'.index','r')
|
||||||
|
partial_index_files.append(index)
|
||||||
|
|
||||||
|
for partial_index_file in partial_index_files:
|
||||||
|
partial_index_file.seek(0,0)
|
||||||
|
parital_index_indices.append(json.loads(partial_index_file.readline()))
|
||||||
|
|
||||||
|
#Start all indexes at 0
|
||||||
|
for partial_file in partial_files:
|
||||||
|
partial_file.seek(0,0)
|
||||||
|
|
||||||
|
pointers = [0]*num_indices
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
|
||||||
|
#Get all values from all indices to find min
|
||||||
|
value = None
|
||||||
|
values = list()
|
||||||
|
for i in range(num_indices):
|
||||||
|
if pointers[i] < parital_index_indices[i]['length']:
|
||||||
|
values.append(parital_index_indices[i]['index'][pointers[i]][0])
|
||||||
|
|
||||||
|
if(len(values) == 0):
|
||||||
|
break
|
||||||
|
value = min(values)
|
||||||
|
|
||||||
|
#Get data from the min value of all indices if exists then save to mergedIndex
|
||||||
|
if value == None:
|
||||||
|
print("I have crashed some how by not getting min value")
|
||||||
|
break
|
||||||
|
|
||||||
|
node = Node()
|
||||||
|
node.index_value = value
|
||||||
|
for i in range(num_indices):
|
||||||
|
if pointers[i] < parital_index_indices[i]['length'] and parital_index_indices[i]['index'][pointers[i]][0] == value:
|
||||||
|
to_seek = parital_index_indices[i]['index'][pointers[i]][1]
|
||||||
|
partial_files[i].seek(to_seek,0)
|
||||||
|
json_value = partial_files[i].readline()
|
||||||
|
temp_node = json.loads(json_value)
|
||||||
|
node.postings = node.postings + temp_node['postings']
|
||||||
|
pointers[i] = pointers[i] + 1
|
||||||
|
|
||||||
|
node.postings.sort(key=lambda y:y['doc_id'])
|
||||||
|
full_index.index.append((value,merged_index.tell()))
|
||||||
|
full_index.length = full_index.length + 1
|
||||||
|
jsonStr = json.dumps(node,default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
merged_index.write(jsonStr + '\n')
|
||||||
|
|
||||||
|
full_index.index.sort(key=lambda y:y[0])
|
||||||
|
jsonStr =json.dumps(full_index, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
with open("merged_index.index" ,'w') as f:
|
||||||
|
f.write(jsonStr)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
indexer = Indexer(True,0)
|
indexer = Indexer(True,list(),list(),list())
|
||||||
indexer.get_data()
|
indexer.get_data_path()
|
||||||
|
indexer.start()
|
||||||
|
indexer.merge()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
0
merged_index.full
Normal file
0
merged_index.full
Normal file
@ -1,11 +1,16 @@
|
|||||||
#Posting class for indexer, will probably be more complex as we keep adding crap to it
|
#Posting class for indexer, will probably be more complex as we keep adding crap to it
|
||||||
|
|
||||||
class Posting():
|
class Posting():
|
||||||
def __init__(self,doc_id,tf_raw,tf_idf,positionals):
|
def __init__(self,doc_id,url,tf_raw,tf_idf,positionals):
|
||||||
self.doc_id = doc_id
|
self.doc_id = doc_id
|
||||||
|
self.url = url
|
||||||
self.tf_raw = tf_raw
|
self.tf_raw = tf_raw
|
||||||
self.tf_idf = tf_idf
|
self.tf_idf = tf_idf
|
||||||
self.positionals = positionals
|
self.positionals = positionals
|
||||||
|
def __repr__(self):
|
||||||
|
return "Doc_id:" + str(self.doc_id) + " URL:" + self.url + " tf_raw:" + str(self.tf_raw) + " tf_idf:" + str(self.tf_idf) + " positionals:" + str(self.positionals)
|
||||||
|
def __str__(self):
|
||||||
|
return "Doc_id:" + str(self.doc_id) + " URL:" + self.url + " tf_raw:" + str(self.tf_raw) + " tf_idf:" + str(self.tf_idf) + " positionals:" + str(self.positionals)
|
||||||
|
|
||||||
def comparator(self):
|
def comparator(self):
|
||||||
#Some custom comparator for sorting postings later
|
#Some custom comparator for sorting postings later
|
||||||
|
156
test.py
156
test.py
@ -1,115 +1,59 @@
|
|||||||
|
from threading import Thread
|
||||||
import json
|
import json
|
||||||
from posting import Posting
|
import os
|
||||||
import math
|
import shelve
|
||||||
import sys
|
import sys
|
||||||
import random
|
from bs4 import BeautifulSoup
|
||||||
from nltk.corpus import words
|
from time import perf_counter
|
||||||
random_list = [1,2,3,4,5,6,7,8,9,10]
|
from nltk.stem import PorterStemmer
|
||||||
|
import nltk
|
||||||
|
import time
|
||||||
|
from posting import Posting
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
test_data = words.words()
|
self_index = dict()
|
||||||
random.shuffle(test_data)
|
stemmer = PorterStemmer()
|
||||||
|
target = 'data/DEV/aiclub_ics_uci_edu/8ef6d99d9f9264fc84514cdd2e680d35843785310331e1db4bbd06dd2b8eda9b.json'
|
||||||
|
file_load = open(target)
|
||||||
|
data = json.load(file_load)
|
||||||
|
doc_id = target[target.rfind('/')+1:-5]
|
||||||
|
url = data['url']
|
||||||
|
soup = BeautifulSoup(data["content"],features="lxml")
|
||||||
|
# Gets a cleaner version text comparative to soup.get_text()
|
||||||
|
clean_text = ' '.join(soup.stripped_strings)
|
||||||
|
# Looks for large white space, tabbed space, and other forms of spacing and removes it
|
||||||
|
# Regex expression matches for space characters excluding a single space or words
|
||||||
|
clean_text = re.sub(r'\s[^ \w]', '', clean_text)
|
||||||
|
# Tokenizes text and joins it back into an entire string. Make sure it is an entire string is essential for get_tf_idf to work as intended
|
||||||
|
clean_text = " ".join([i for i in clean_text.split() if i != "" and re.fullmatch('[A-Za-z0-9]+', i)])
|
||||||
|
# Stems tokenized text
|
||||||
|
clean_text = " ".join([stemmer.stem(i) for i in clean_text.split()])
|
||||||
|
|
||||||
class Node():
|
tokens = nltk.word_tokenize(clean_text)
|
||||||
index_value = ''
|
|
||||||
postings = list()
|
|
||||||
|
|
||||||
class Index():
|
#counter(count,positionals)
|
||||||
length = 0
|
|
||||||
index = list()
|
|
||||||
|
|
||||||
def random_posting(id):
|
counter = dict()
|
||||||
return Posting(id,random.choice(random_list),random.choice(random_list),[random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list),
|
for i in range(len(tokens)):
|
||||||
random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list)])
|
word = tokens[i]
|
||||||
|
if word in counter:
|
||||||
|
counter[word][0] = counter[word][0] + 1
|
||||||
|
counter[word][1].append(i)
|
||||||
|
else:
|
||||||
|
counter[word] = [1,list()]
|
||||||
|
counter[word][1].append(i)
|
||||||
|
print(counter)
|
||||||
|
doc_length = len(tokens)
|
||||||
|
for index in counter:
|
||||||
|
if index in self_index:
|
||||||
|
postings = self_index[index]
|
||||||
|
postings.append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1]))
|
||||||
|
else:
|
||||||
|
self_index[index] = list()
|
||||||
|
self_index[index].append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1]))
|
||||||
|
|
||||||
def random_partial_index(name):
|
for index in self_index:
|
||||||
part_index = Index()
|
print(index + str(self_index[index]) + '\n')
|
||||||
part_index.index = list()
|
|
||||||
part_index.length = 0
|
|
||||||
with open(name +'.partial', 'w') as f:
|
|
||||||
for i in range(1000):
|
|
||||||
|
|
||||||
node1 = Node()
|
print("The size of the dictionary is {} bytes".format(sys.getsizeof(self_index)))
|
||||||
node1.index_value = random.choice(test_data).lower()
|
|
||||||
node1.postings = list()
|
|
||||||
for i in range(10):
|
|
||||||
node1.postings.append(random_posting(i))
|
|
||||||
|
|
||||||
jsonStr = json.dumps(node1, default=lambda o: o.__dict__,sort_keys=False)
|
|
||||||
|
|
||||||
part_index.index.append((node1.index_value,f.tell()))
|
|
||||||
f.write(jsonStr + '\n')
|
|
||||||
part_index.length = part_index.length + 1
|
|
||||||
|
|
||||||
part_index.index.sort(key=lambda y:y[0])
|
|
||||||
jsonStr =json.dumps(part_index, default=lambda o: o.__dict__,sort_keys=False)
|
|
||||||
with open(name + '.index','w') as f:
|
|
||||||
f.write(jsonStr)
|
|
||||||
|
|
||||||
def merge(partial_indices):
|
|
||||||
partial_files = list()
|
|
||||||
partial_index_files = list()
|
|
||||||
parital_index_indices = list()
|
|
||||||
merged_index = open("merged_index.full",'w')
|
|
||||||
num_indices = len(partial_indices)
|
|
||||||
|
|
||||||
#Full Index.Index and Length
|
|
||||||
full_index = Index()
|
|
||||||
full_index.index = list()
|
|
||||||
full_index.length = 0
|
|
||||||
|
|
||||||
for partial_index in partial_indices:
|
|
||||||
file = open(partial_index+'.partial','r')
|
|
||||||
partial_files.append(file)
|
|
||||||
index = open(partial_index+'.index','r')
|
|
||||||
partial_index_files.append(index)
|
|
||||||
|
|
||||||
for partial_index_file in partial_index_files:
|
|
||||||
partial_index_file.seek(0,0)
|
|
||||||
parital_index_indices.append(json.loads(partial_index_file.readline()))
|
|
||||||
|
|
||||||
#Start all indexes at 0
|
|
||||||
for partial_file in partial_files:
|
|
||||||
partial_file.seek(0,0)
|
|
||||||
|
|
||||||
pointers = [0]*num_indices
|
|
||||||
|
|
||||||
while(True):
|
|
||||||
|
|
||||||
#Get all values from all indices to find min
|
|
||||||
value = None
|
|
||||||
values = list()
|
|
||||||
for i in range(num_indices):
|
|
||||||
if pointers[i] < parital_index_indices[i]['length']:
|
|
||||||
values.append(parital_index_indices[i]['index'][pointers[i]][0])
|
|
||||||
|
|
||||||
if(len(values) == 0):
|
|
||||||
break
|
|
||||||
value = min(values)
|
|
||||||
|
|
||||||
#Get data from the min value of all indices if exists then save to mergedIndex
|
|
||||||
if value == None:
|
|
||||||
print("I have crashed some how by not getting min value")
|
|
||||||
break
|
|
||||||
|
|
||||||
node = Node()
|
|
||||||
node.index_value = value
|
|
||||||
for i in range(num_indices):
|
|
||||||
if pointers[i] < parital_index_indices[i]['length'] and parital_index_indices[i]['index'][pointers[i]][0] == value:
|
|
||||||
to_seek = parital_index_indices[i]['index'][pointers[i]][1]
|
|
||||||
partial_files[i].seek(to_seek,0)
|
|
||||||
json_value = partial_files[i].readline()
|
|
||||||
temp_node = json.loads(json_value)
|
|
||||||
node.postings = node.postings + temp_node['postings']
|
|
||||||
pointers[i] = pointers[i] + 1
|
|
||||||
|
|
||||||
node.postings.sort(key=lambda y:y['doc_id'])
|
|
||||||
full_index.index.append((value,merged_index.tell()))
|
|
||||||
full_index.length = full_index.length + 1
|
|
||||||
jsonStr = json.dumps(node,default=lambda o: o.__dict__,sort_keys=False)
|
|
||||||
merged_index.write(jsonStr + '\n')
|
|
||||||
|
|
||||||
full_index.index.sort(key=lambda y:y[0])
|
|
||||||
jsonStr =json.dumps(full_index, default=lambda o: o.__dict__,sort_keys=False)
|
|
||||||
with open("merged_index.index" ,'w') as f:
|
|
||||||
f.write(jsonStr)
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"url": "http://alderis.ics.uci.edu/links.html", "content": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n<head>\n<title>Alderis@UCI - Analysis Language for Distributed, Embedded, and Real-time Systems</title>\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\n<link rel=\"stylesheet\" href=\"css/1.css\" type=\"text/css\" media=\"screen,projection\" />\n\n</head>\n \n<body>\n\n\t\t<div id=\"container\">\n\t\t\n\t\t\t\t<div id=\"header\">\n\t\t\t\t\n\t\t\t\t\t\t<h1>Alderis@UCI</a></h1>\n\t\t\t\t\t\t<h3>Analysis Language for Distributed, Embedded, and Real-time Systems.</h3>\n\t\t\t\t\n\t\t\t\t</div>\n\t\t\t\t\n\t\t\t\t<ul id=\"nav\">\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t<li><a href=\"downloads.html\">Downloads</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"publications.html\">Publications</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"dresystems.html\">DRE Systems</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"amba2.html\">AMBA</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"#\" class=\"active\">Links</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"index.html\">Home</a></li>\n\t\t\t\t\t\t\t\t\n\t\t\t\t</ul>\n\t\t\t\t\n\t\t\t\t<br class=\"clear\" />\n\t\t\t\t\n\t\t\t\t<div id=\"sidebar\">\n\t\t\t\t\n\t\t\t\t\t\t<h1>Description</h1>\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t<p>You can find some pointers to related research groups here...</p>\n\t\t\t\t\t\t\n\t\t\t\t\t\t<div class=\"sidebarfooter\">\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t<a href=\"http://dre.sourceforge.net\">DREAM</a>\n\t\t\t\t\t\t\t\t<a href=\"http://www.cecs.uci.edu/~aces\">ACES</a>\n\t\t\t\t\t\t\t\t<a href=\"http://www.ics.uci.edu/~forge\">FORGE</a>\n\t\t\t\t\t\t\n\t\t\t\t\t\t</div>\n\t\t\t\t\t\t\n\t\t\t\t\t\t<div id=\"sidebar_bottom\"></div>\n\t\t\t\t\n\t\t\t\t</div>\n\t\t\t\t\n\t\t\t\t<div id=\"content\">\n\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t<h1>Links</h1>\n\n\t\t\t\t\t\t<p>\n\t\t\t\t\t\t<ul>\n\t\t\t\t\t\t\t<li>The Alderis project is hosted at the <a href=\"http://www.cecs.uci.edu\">Center for Embedded Computer Systems</a>, <a href=\"http://www.uci.edu\">University of California, Irvine</a>, under the supervision of <a href=\"http://www.ics.uci.edu/~dutt/\">Nikil Dutt</a>.</li>\n\t\t\t\t\t\t\t<li>We are also collaborating with researchers from the <a href=\"http://www.ics.uci.edu/~dsm\">Distributed Systems Middleware Group</a> and the <a href=\"http://doc.ece.uci.edu\">DOC group</a> at UCI, and from <a href=\"http://www.isis.vanderbilt.edu\">ISIS</a> at Vanderbilt University.</li>\n\t\t\t\t\t\t\t<li>The Alderis language can be analyzed using the <a href=\"http://dre.sourceforge.net\">open-source DREAM ™ tool</a>.</li>\n\t\t\t\t\t\t\t<li>DREAM ™ uses the <a href=\"http://www.uppaal.com\">UPPAAL</a> model checker and the <a href=\"http://www-verimag.imag.fr/~async/IF/\">Verimag IF toolset</a> for formal verification.</li>\n\t\t\t\t\t\t\t<li>The original design of the website has been created by <a href=\"http://www.jameskoster.co.uk\">JayKay</a>.</li>\n\t\t\t\t\t\t</ul>\n\t\t\t\t\t\t</p>\n\n\t\t\t\t</div>\n\t\t\n\t\t</div>\n\t\t\n\t\t<div id=\"footer\">\n\t\t\n\t\t\t\t<p>\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t© All the material published on this website is copyrighted. All rights reserved. <a href=\"http://www.ipstat.com/cgi-bin/stats?name=alderis\"><img src=\"http://www.ipstat.com/cgi-bin/ipstat?name=alderis\" height=\"5\" width=\"5\" border=\"0\"></a>\n\n\t\t\t\t\n\t\t\t\t\t\t</p>\n\t\t\n\t\t</div>\n\n\n</body>\n</html>\n", "encoding": "ascii"}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"url": "http://alderis.ics.uci.edu/downloads.html", "content": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\">\n<head>\n<title>Alderis@UCI - Analysis Language for Distributed, Embedded, and Real-time Systems</title>\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\n<link rel=\"stylesheet\" href=\"css/1.css\" type=\"text/css\" media=\"screen,projection\" />\n\n</head>\n \n<body>\n\n\t\t<div id=\"container\">\n\t\t\n\t\t\t\t<div id=\"header\">\n\t\t\t\t\n\t\t\t\t\t\t<h1>Alderis@UCI</a></h1>\n\t\t\t\t\t\t<h3>Analysis Language for Distributed, Embedded, and Real-time Systems.</h3>\n\t\t\t\t\n\t\t\t\t</div>\n\t\t\t\t\n\t\t\t\t<ul id=\"nav\">\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t<li><a href=\"#\" class=\"active\">Downloads</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"publications.html\">Publications</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"dresystems.html\">DRE Systems</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"amba2.html\">AMBA</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"links.html\">Links</a></li>\n\t\t\t\t\t\t\t\t<li><a href=\"index.html\">Home</a></li>\n\t\t\t\t\t\t\t\t\n\t\t\t\t</ul>\n\t\t\t\t\n\t\t\t\t<br class=\"clear\" />\n\t\t\t\t\n\t\t\t\t<div id=\"sidebar\">\n\t\t\t\t\n\t\t\t\t\t\t<h1>Feedback</h1>\n\t\t\t\t\t\t\n\t\t\t\t\t\t<p>We need your feedback to further develop the analysis and verification methods. If you find our tools useful or if you have comments / questions regarding the Alderis/DREAM ™ framework please contact me at gabe at uci dot edu.</p>\n\t\t\t\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t<div class=\"sidebarfooter\">\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t<a href=\"http://dre.sourceforge.net\">DREAM</a>\n\t\t\t\t\t\t\t\t<a href=\"http://www.cecs.uci.edu/~aces\">ACES</a>\n\t\t\t\t\t\t\t\t<a href=\"http://www.ics.uci.edu/~forge\">FORGE</a>\n\t\t\t\t\t\t\n\t\t\t\t\t\t</div>\n\t\t\t\t\t\t\n\t\t\t\t\t\t<div id=\"sidebar_bottom\"></div>\n\t\t\t\t\n\t\t\t\t</div>\n\t\t\t\t\n\t\t\t\t<div id=\"content\">\n\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\n\t\t\t\t\t\t<h1>Downloads</h1>\n\n\t\t\t\t\t\t<li><img src=\"pics/new.png\"/>Download the recently released ALDERIS modeling language: <a href=\"files/ALDERIS_1.0.zip\">ALDERIS_1.0.zip</a></li>\n\n\t\t\t\t\t\t<li><img src=\"pics/new.png\"/>Download the examples created using the ALDERIS modeling language: <a href=\"files/ALDERIS_Examples_1.0.zip\">ALDERIS_Examples_1.0.zip</a>. The examples include small to large-scale Boeing Bold Stroke models as well as a tiny helicopter autopilot case study.</li>\n\n\t\t\t\t\t\t<li>To download the open-source DREAM analysis tool please visit the sourceforge website: <a href=\"http://dre.sourceforge.net\">http://dre.sourceforge.net</a></li>\n\n\t\t\t\t\t\t<li>To download the recently open-sourced Generic Modeling Environment (GME) please visit the ISIS GME website: <a href=\"http://www.isis.vanderbilt.edu/projects/gme\">http://www.isis.vanderbilt.edu/projects/gme</a></li>\n\n\t\t\t\t</div>\n\t\t\n\t\t</div>\n\t\t\n\t\t<div id=\"footer\">\n\t\t\n\t\t\t\t<p>\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t© All the material published on this website is copyrighted. All rights reserved. <a href=\"http://www.ipstat.com/cgi-bin/stats?name=alderis\"><img src=\"http://www.ipstat.com/cgi-bin/ipstat?name=alderis\" height=\"5\" width=\"5\" border=\"0\"></a>\n\n\t\t\t\t\n\t\t\t\t\t\t</p>\n\t\t\n\t\t</div>\n\n\n</body>\n</html>\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=journal", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/hilario.html", "content": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<style>\np.margin\n{\nmargin-top:-1cm;\nmargin-bottom:2cm;\nmargin-right:2cm;\nmargin-left:5cm;\n}\n</style>\n<title>PaulHilarioArtist</title>\n</head>\n<p class=\"margin\">\n\n<p>\n<font face=\"tahoma\" color=\"navy\" size=\"5\">\nRecent work of Paul Hilario<br/>\n</font>\n<font face=\"tahoma\" color=\"navy\" size=\"3\">\nLos Banos, Laguna <br/>\nPhilippines <br/>\n</font>\n</p>\n\n</font>\n<font face=\"tahoma\" color=\"navy\" size=\"3\">\nPaul's paintings and their descriptions (in his own words ...)\n</font>\n<br/>\n\n<p>\n<img src=\"Bugaw.jpg\" height=\"350\" width=\"600\" align=\"left\" hspace=20>\n<font face=\"tahoma\" color =\"maroon\" size=\"4\">\nBUGAW (Shoo!)\n</font>\n<br/>\n<font face=\"tahoma\" color=\"navy\" size=\"3\">\nThis painting is about at least two subjects. One subject is about the environment. Notice the slingshot on the boy's pocket? \nHe decided not to hurt or kill any of the birds.\nAlso, if you can feel the way I feel about the painting, don't you get the impression that they are rejoicing? \nThe two kid's raised arms are in celebration of a forthcoming bountiful rice harvest. The fields are heavily filled with golden rice.\n</font>\n</p>\n<BR clear=ALL />\n\n\n<br/>\n<br/>\n<p>\n<img src=\"Salat_Nguni't_Sapat.jpg\" height=\"375\" width=\"600\" align=\"left\" hspace=50>\n<font face=\"tahoma\" color =\"maroon\" size=\"3\">\nSalat Nguni't Sapat (Not Enough but Enough)\n</font>\n<br/>\n<font face=\"tahoma\" color=\"navy\" size=\"3\">\nMany children in rural areas in the Philippines walk miles just to get to school. <br/>\nThey wear ragged clothes, worn out and broken footwear and have barely enough money for food and school supplies. \nSome classes have more than 40 students so everybody has to share tables and chairs and the room is cramped. \nBut surprisingly so they feel happy that they have the opportunity to go to school while many don't have that chance at all. \nThis is an example of a trait of Filipinos - to always look at the brighter side of things no matter how trying times can be.\n</font>\n</p>\n<BR clear=ALL />\n\n<br/>\n<br/>\n\n<font face=\"tahoma\" color =\"maroon\" size=\"4\">\nAbout Paul \n</font>\n<br/>\n<font face=\"tahoma\" color =\"navy\" size=\"3\">\nPaul is an up-and-coming artist in the Philippines.\nHe has been making art as young as 4 years old and at that early age he knew that he wanted to be an artist. \nIt is only recently, however, that he decided \nto give art a serious professional chance as a career. He creates art from 3:00 am to 6:30 am and transforms into a museum curator from 8 am to 5 pm weekdays. \nPaul has developed his own signature style of art, and relishes in creating not just for the viewer's eyes but to also stimulate other senses and emotions as well. \nHis subjects can be drawn from anything around him but most of his works are taken from cultural and environmental themes, with both sometimes curiously intersecting in one canvas. \nHe subtlety includes hidden messages, stories, and lessons in the paintings. Paul prefers to use a lot of bright colours and high contrast. \nHis style is eclectic. He mix and matches impressionism with pop, low brow, cubism and fauvism. His work is generally identified between impressionism, pop and naif art. \nHe considers Vincent Van Gogh and Filipino painter Marcel Antonio as his art influences. \n</font>\n\n<body>\n</body>\n</html>\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=projects", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=grants", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>Active Grants</h2>\n\n<ul>\n<li>Principal Investigator, NSF Division of Social and Economic Sciences (2011-2014), Models and Methods for Non-stationary Behavioral Time Series</li>\n</ul>\n<ul>\n<li>Principal Investigator, NSF Division Mathematical Sciences (2011-2014), Applied Probability and Time Series Modeling</li>\n</ul>\n<ul>\n<li>Principal Investigator, NSF Division of Mathematical Sciences (2012), Developing Novel Statistical Methods in NeuroImaging (workshop)</li>\n</ul>\n\n\n<h2>Selected Completed Grants</h2>\n\n<ul>\n<li>Principal Investigator, NSF Division of Mathematical Sciences, Collaborative Research: Spectral and Connectivity Analysis of Non-Stationary Spatio-Temporal Data </li>\n</ul>\n<ul>\n<li>Co-Principal Investigator, NSF Brain and Cognition Sciences (PI: J. Sanes, Brown Univ) (2009-2011), Motor Intention </li>\n</ul>\n<ul>\n<li>Co-Investigator, NIMH (PI: S. Haber, Univ Rochester), Underlying behavioral effects and mechanisms of DBS in OCD </li>\n</ul>\n<ul>\n<li>Co-Investigator, NIMH (PI: Dickstein, Bradley Hospital) (2009-2011), Bio-behavioral Markers of Bipolar Conversion </li>\n</ul>\n<ul>\n<li>Co-Investigator, NIH (PI: Rakowski, Brown) (2009-2011), Classification Tree Analysis to Enhance Targeting for Cancer Screening Programs</li>\n</ul>\n<ul>\n<li>Principal Investigator, NSF Division of Mathematical Sciences (2006-2008), Collaborative Research: Time Series in Experimental Designs </li>\n</ul>\n<ul>\n<li>Principal Investigator, NSF Division of Mathematical Sciences (2004-2008), Localized Cross Spectral Analysis and Pattern Recognition in Non-Stationary Signals </li>\n</ul>\n<ul>\n<li>Co-Principal Investigator, NSF Division of Mathematical Sciences (PI: Stoffer, Pittsburgh) (2001-2004), Frequency Domain Methods in Time Series Analysis</li>\n</ul>\n<ul>\n<li>Principal Investigator (subcontract from UPenn) (2000-2004), NIMH RO1, Automatic Statistical Time-Frequency Analysis </li>\n</ul>\n\n\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=news", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>2012</h2>\n\n<ul>\n<li>Mark Fiecas received his PhD degree in Biostatistics from Brown University. His research was on connectivity measures in brain imaging. Mark is now a post-doctoral scholar in the Department of Psychiatry at UC-San Diego.</li>\n</ul>\n<ul>\n<li>Cristina Gorrostieta received her PhD degree in Biostatistics from Brown University. Her dissertation research was on complex dependence measures in multivariate time series analysis.</li>\n</ul>\n<ul>\n<li>NSF will fund the workshop <a href=\"http://www.ics.uci.edu/~hombao/neurostatsw2012.html\">Developing Novel Statistical Methods for Neuroimaging</a> which will be held in San Diego before JSM 2012. The PI is H. Ombao and co-organizers are Martin Lindquist (Columbia) and Wesley Thompson (UC San Diego). </li>\n</ul>\n<ul>\n<li>Collaborator Giovanni Motta (Maastricht Univ) visited UC-Irvine from Jan - March 2012. </li>\n</ul>\n<ul>\n<li>Hernando Ombao started his new position as Associate Professor in the Department of Statistics at UC-Irvine. </li>\n</ul>\n\n<h2>2011</h2>\n\n<ul>\n<li>Hakmook Kang received his PhD degree in Biostatistics from Brown University. His research was on Spatio-Spectral Analysis of functional magnetic resonance imaging data. Dr. Kang is now Assistant Professor of Biostatistics at Vanderbilt University. </li>\n</ul>\n<ul>\n<li>Devin Koestler won the Student Paper Award which was presented at the ENAR conference in Miami, FL. His paper, co-authored with H. Ombao, was on forecasting census counts using both seasonal and patients' clinical data. </li>\n</ul>\n<ul>\n<li>Hakmook Kang is the 2011 winner of the John van Ryzin Award for Best Paper. The award was presented at the ENAR conference in Miami, FL.</li>\n</ul>\n<ul>\n<li>Dan van Lunen received his ScB degree in Applied Mathematics from Brown University. Dan wrote a thesis on online change-point detection under the direction of H. Ombao.</li>\n</ul>\n\n<h2>2010</h2>\n\n<ul>\n<li>Mark Fiecas received one of the Student Awards for his paper on generalized shrinkage for estimating partial coherence. The award was presented at the New England Statistics Symposium at Harvard University.</li>\n</ul>\n<ul>\n<li>Dan van Lunen, undergraduate student in Applied Mathematics, received the summer research training award to compare diffusion tensor imaging data recorded under different scanning protocols. </li>\n</ul>\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/instat.html", "content": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<style>\np.margin\n{\nmargin-top:-1cm;\nmargin-bottom:2cm;\nmargin-right:2cm;\nmargin-left:5cm;\n}\n</style>\n<title>INSTAT Lectures</title>\n</head>\n<p class=\"margin\">\n<p>\n<img src=\"upseal.png\" height=\"150\" align=\"left\" hspace=20 >\n<font face=\"tahoma\" color =\"black\" size=\"4\">\n<br/>\n<br/>\nHernando Ombao, Ph.D. \n</font>\n<br/>\n<font face=\"tahoma\" color=\"navy\" size=\"3\">\nVisiting Scholar <br/>\nInstitute of Statistics <br/>\nUniversity of the Philippines at Los Banos<br/> \n</font>\n</p>\n<br/>\n<br/>\n\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\n<p>\nPRESENTATIONS\n</p>\n</font>\n<font face=\"tahoma\" color = \"navy\" size=\"3\">\n<ul>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Ombao_UPLB_INSTAT_Nov_2012.pdf\">Seminar at INSTAT: Modeling Dependence in Multivariate Time Series</a></li>\n<br/>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Ombao_UPLB_IBS_Dec_2012.pdf\">Seminar at the IBS: Time Series and Applications to Biological Data</a></li>\n<br/>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Ombao_UPManila_Dec_2012.pdf\">Seminar at Inst Clinical Epidemiology UP Manila: Time Series and Applications to Health Data</a>\n</li>\n<br/>\n<li>Overview: Time Domain Analysis</li>\n<ul>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Day1-Overview.pdf\">Slides</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Day1-Lec.pdf\">Notes</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/R-Computing-TIMEDOMAIN.txt\">Codes</a></li>\n</ul>\n<br/>\n<li>Overview: Spectral Domain Analysis</li>\n<ul>\n<li>Slides</li>\n<li>Notes</li>\n<li>Computer Codes</li>\n</ul>\n</ul>\n</font>\n\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\n<p>\nSELECTED PAPERS\n</p>\n</font>\n<font face=\"tahoma\" color = \"navy\" size=\"3\">\n<ul>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/JASA2001.pdf\">Automatic Analysis of Bivariate Non-Stationary Time Series</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/JASA-2004-SLEX-Disc.pdf\">Classification and Discrimination of Non-Stationary Time Series</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/MultiSLEX-JASA2005.pdf\">Multivariate SLEX Analysis</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/IEEE-EvolCoh-FINAL.pdf\">Evolutionary Coherence Analysis</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/AOAS_GeneralizedShrinkage-FINAL.pdf\">Generalized Shrinkage Estimator for the Partial Coherence Matrix</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/BookChapter-OMBAO-SLEX-2012.PDF\">Book Chapter on SLEX Methods</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/Monkey-LFP-Gorrostieta-Ombao-JTSA-2012.pdf\">Models for Cross-Oscillatory Dependence</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/ME-VAR-NEUROIMAGE-FINAL.PDF\">Mixed Effects Vector Auto-regressive Model</a></li>\n<li><a href=\"http://www.ics.uci.edu/~hombao/Downloads/SpatioSpectral_JASA2012.pdf\">Spatio-Spectral Mixed Effects Models</a></li>\n</ul>\n\n<body>\n</body>\n</html>\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=upcoming", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>2012+</h2>\n\n<ul>\n<li>Joint Statistical Meetings 2012, San Diego CA</li>\n</ul>\n<ul>\n<li>Workshop on Developing Novel Statistical Methods in Neuroimaging, San Diego CA</li>\n</ul>\n<ul>\n<li>Workshop on Statistical Learning and Data Mining, Ann Arbor MI</li>\n</ul>\n<ul>\n<li>International Chinese Statistical Association Conference, Boston MA</li>\n</ul>\n<ul>\n<li>Institut de statistique 20th anniversary, Universite catholique de Louvain, Belgium</li>\n</ul>\n\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/hernando.html", "content": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<style>\np.margin\n{\nmargin-top:-1cm;\nmargin-bottom:2cm;\nmargin-right:2cm;\nmargin-left:5cm;\n}\n</style>\n<title>Hernando Ombao UCIrvine</title>\n</head>\n<p class=\"margin\">\n<p>\n<img src=\"HO-Solo-Painting.jpg\" height=\"180\" align=\"left\" hspace=20 >\n<font face=\"tahoma\" color =\"black\" size=\"4\">\nHernando Ombao, Ph.D. \n</font>\n<br/>\n<font face=\"tahoma\" color=\"navy\" size=\"2\">\nProfessor <br/>\nDepartment of Statistics <br/>\nUniversity of California at Irvine <br/> \nBren Hall, Room 2206 <br />\nIrvine, CA 92697 USA<br/>\nPhone: (949) 824-5679 <br/>\nEmail: hombao AT uci DOT edu <br/>\n</font>\n<font face=\"tahoma\" color = \"red\" size=\"4\">\n<a href=\"http://www.ics.uci.edu/~hombao/Ombao-CV.pdf\">CV</a>\n</font>\n\n<br/>\n<br/>\n\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\n<p>\nRESEARCH AREAS\n</p>\n</font>\n<font face=\"tahoma\" color = \"navy\" size=\"3\">\n<ul>\n<li>Time Series Analysis</li>\n<li>Spatio-temporal modelling</li>\n<li>Statistical Learning</li>\n<li>Applications to Brain Science (fMRI, EEG, MEG, EROS)</li>\n</ul>\n</font>\n\n\n\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\n<p>\nRESEARCH GROUPS \n</p>\n</font>\n<p>\n\n\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\n<ul>\n<li><a href=\"http://ucispacetime.wix.com/spacetime\">\nSpace-Time Modeling at UC-Irvine</a>\n</li>\n<li><a href=\"http://www.ics.uci.edu/~babaks/CIMR/Home.html\">\nComputational Research in Neuroscience </a>\n</li>\n<li> <a href=\"http://www.ics.uci.edu/~hombao/neurostatsw2012.html\"> \nRecent NSF-Funded Workshop on Statistics in NeuroImaging</a> \n<br/>\n<font face=\"tahoma\" color = \"navy\" size=\"2\"> \n24 July to 26 July 2012 at San Diego, CA <br/>\n</font>\n<font face=\"tahoma\" color = \"navy\" size=\"2\"> \nCo-orgznized with M. Lindquist (Johns Hopkins) \nand W. Thompson (UCSD) <br/>\n</font> \n</p>\n</font> \n</ul>\n\n<p>\n<font face=\"tahoma\" color = \"maroon\" size=\"4\">\nI Support <a href=\"http://www.ics.uci.edu/~hombao/hilario.html\">\nup-and-coming artists\n</a>\n</p>\n<body>\n</body>\n</html>\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=research", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=recent", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>2013</h2>\n<ul>\n<li>International Chinese Statistical Association, Hong Kong</li>\n</ul>\n<ul>\n<li>American Mathematical Society Workshop, Riverside CA</li>\n</ul>\n<ul>\n<li>ENAR Conference, Orlando FL</li>\n</ul>\n<ul>\n<li>Department of Mathematics, Lancaster University, UK</li>\n</ul>\n\n\n<h2>2012</h2>\n<ul>\n<li><a href=\"http://www.ics.uci.edu/~hombao/instat.html\">Institute of Statistics, University of the Philippines at Los Banos</a></li>\n</ul>\n<ul>\n<li>Institute for Stochastics, Karlsruhe Institute of Technology, Germany</li>\n</ul>\n<ul>\n<li>Department of Mathematics and Statistics, San Diego State University</li>\n</ul>\n<ul>\n<li>Department of Applied Mathematics and Statistics, Univ California at Santa Cruz</li>\n</ul>\n<ul>\n<li>Department of Statistics, Univ California at Riverside</li>\n</ul>\n<ul>\n<li>ENAR Conference, Washington DC</li>\n</ul>\n\n<h2>2011</h2>\n<ul>\n<li>Department of Mathematics, Pomona College</li>\n</ul>\n<ul>\n<li>Department of Statistics, University of Virginia</li>\n</ul>\n<ul>\n<li>Joint Statistical Meetings, Miami, FL</li>\n</ul>\n<ul>\n<li>Department of Statistics, University of Warwick, UK</li>\n</ul>\n<ul>\n<li>Department of Mathematics, Lancaster University, UK</li>\n</ul>\n<ul>\n<li>Department of Mathematics, Bristol University, UK</li>\n</ul>\n<ul>\n<li>International Chinese Statistical Association Conference, New York</li>\n</ul>\n<ul>\n<li>Department of Statistics, University College London, UK</li>\n</ul>\n<ul>\n<li>ENAR Conference, Miami, FL </li>\n</ul>\n<ul>\n<li>Division of Biostatistics, University of Minnesota</li>\n</ul>\n<ul>\n<li>Centro de Investigaciones en Matematicas, Guanajuato, Mexico</li>\n</ul>\n<ul>\n<li>Department of Biostatistics, Univ North Carolina</li>\n</ul>\n<ul>\n<li>Department of Statistics, Univ California at Irvine</li>\n</ul>\n<ul>\n<li>Department of Biostatistics, Emory University</li>\n</ul>\n<ul>\n<li>Human Brain Mapping Conference, Quebec City</li>\n</ul>\n\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=education", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=overview", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=news_and_events", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/R-Computing-TIMEDOMAIN.txt", "content": "\n## Install astsa (one time only)\n\n## Load astsa (every time you need to use this package)\n\n## To access all datasets in astsa\nastsadata()\n\n## These commands are taken from Shumway and Stoffer (2010)\n\n############### Global temperature data ####################\n\n## plot\npar(mfrow=c(1,1))\nplot(gtemp, type=\"o\", ylab=\"Global Temperature Deviations\")\n\n## fit a linear model\nfit <- lm(gtemp ~ time(gtemp)) ## regress gtemp on time\n## fit is an object that stores output from lm\nnames(fit)\nsummary(fit)\npar(mfrow=c(1,1))\nplot(gtemp, type=\"o\", ylab=\"Global Temperature Deviation\")\nabline(fit, col=2) ## add estimated regression line to the plot\n\n## examine the residuals\nresid = fit$resid;\npred = fit$fitted;\npar(mfrow=c(2,1)); \nplot(resid, type=\"o\", ylab=\"Residuals\", xlab=\"time\"); \nN = length(resid);\nzeroline = rep(0, N);\nlines(zeroline, col=2);\nplot(y=resid, x=pred, type=\"o\", ylab=\"Residuals\", xlab=\"predicted\"); \nlines(x=pred, y=zeroline, col=2);\n\npar(mfrow=c(2,1)); \nacf(resid);\npacf(resid); \n\n\n############### LA County Cardiac Mortality Data ##############\n\n## Plot the LA County mortality dataset\n## Series: cmort, temperature, particulate\npar(mfrow=c(3,1))\nplot(cmort, main=\"Cardiovascular Mortality\", xlab=\"\", ylab=\"\"); \nplot(tempr, main=\"Temperature\", xlab=\"\", ylab=\"\")\nplot(part, main=\"Particulates\", xlab=\"\", ylab=\"\")\n\ndev.new()\npairs(cbind(Mortality=cmort, Temperature=tempr, Particulates=part))\n\n##### Fit a model for cardiac mortality\ntemp= tempr-mean(tempr) ## center temperature \ntemp2 = temp^2 ## square of temp\ntrend = time(cmort) ## time\nfit = lm(cmort~trend + temp + temp2 + part, na.action=NULL)\nsummary(fit) ## regression results\nsummary(aov(fit)) ## ANOVA table (compare to next line) \nsummary(aov(lm(cmort~cbind(trend, temp, temp2, part)))) ## Table 2.1 \n\n## Illustration on how to compute the information criteria\nnum = length(cmort) ## sample size\nAIC(fit)/num- log(2*pi) ## AIC\nAIC(fit, k=log(num))/num- log(2*pi) ## BIC\n(AICc = log(sum(resid(fit)^2)/num)+ (num+5)/(num-5-2)) ## AICc\n\n## Example on smoothing/filtering\nma5 = filter(cmort, sides=2, rep(1,5)/5)\nma53 = filter(cmort, sides=2, rep(1,53)/53) \npar(mfrow=c(1,1));\nplot(cmort, type=\"p\", ylab=\"mortality\") \nlines(ma5,col=2)\nlines(ma53,col=3)\n\n##### Fitting some polynomial and sinusoidal trends\nwk = time(cmort) - mean(time(cmort)) ## wk is essentially t/52 centered at zero\nwk2 = wk^2 \nwk3 = wk^3\ncs = cos(2*pi*wk)\nsn = sin(2*pi*wk)\nreg1 = lm(cmort ~ wk + wk2 + wk3, na.action=NULL)\nreg2 = lm(cmort ~ wk + wk2 + wk3 + cs + sn, na.action=NULL) \npar(mfrow=c(1,1));\nplot(cmort, type=\"p\", ylab=\"mortality\") \nlines(fitted(reg1), col=2)\nlines(fitted(reg2), col=3)\n\n## Compare the residual series for each model\nresid1 = reg1$resid\nresid2 = reg2$resid\npar(mfrow=c(2,1));\nplot(resid1)\nplot(resid2)\n\npar(mfrow=c(2,2)); \nacf(resid1)\nacf(resid2)\npacf(resid1)\npacf(resid2)\n\n\n###### Kernel smoothing (with Guassian kernel)\npar(mfrow=c(1,1));\nplot(cmort, type=\"p\", ylab=\"mortality\");\nsm1 = ksmooth(time(cmort), cmort, \"normal\", bandwidth=5/52);\nsm2 = ksmooth(time(cmort), cmort, \"normal\", bandwidth=2);\nlines(sm1, col=2); \nlines(sm2, col=3);\n\n\n#################### ARIMA ##########################\n\n##### Generating time series \n\n##### AR\nts1 = arima.sim(list(order=c(1,0,0), ar=0.9), n=100);\nts2 = arima.sim(list(order=c(1,0,0), ar=-0.9), n=100);\n\npar(mfrow=c(2,1)) ## in the expression below, -is a space and == is equal\nplot(ts1, ylab=\"TS\", xlab=\"Time\"); title(paste(\"AR(1) phi=+.9\"));\nplot(ts2, ylab=\"TS\", xlab=\"Time\"); title(paste(\"AR(1) phi=-.9\"));\n\n##### MA\nts1 = arima.sim(list(order=c(0,0,1), ma=c(0.5)), n=100);\nts2 = arima.sim(list(order=c(0,0,5), ma=c(1, 0.3, 0.2, 0.1, 0.05)), n=100);\n\npar(mfrow=c(2,1)) ## in the expression below, -is a space and == is equal\nplot(ts1, ylab=\"TS\", xlab=\"Time\"); title(paste(\"MA(1)\"));\nplot(ts2, ylab=\"TS\", xlab=\"Time\"); title(paste(\"MA(5)\"));\n\n\n##### Fitting ARIMA models to data\n\n## Data is generated by an AR(1) process\ndata1 = arima.sim(list(order=c(1,0,0), ar=0.9), n=100);\n\n## Fit ARMA(2,0,1) to data1\nfit1 = arima(data1, order=c(2,0,1))\n\n\n\n\n\n\n\n\n\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=people", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>Principal Investigator</h2>\n<p><li><a href=\"http://www.ics.uci.edu/~hombao/hernando.html\">Hernando Ombao</a> (Statistics)</p>\n\n<h2>Students At UC-Irvine</h2>\n<p><li>Cristina Gorrostieta (Post-doctoral researcher, starting Summer 2012)</p>\n<p><li>Yuxiao Wang (PhD student, starting Fall 2012)</li></p>\n<p><li>Michael Wojnowicz (PhD student, starting Fall 2012)</li></p>\n<p><li>Zhe Yu (PhD student, since Fall 2011)</li></p>\n\n<h2>Faculty Collaborators at UC-Irvine</h2>\n\n<p><li><a href=\"http://www.ics.uci.edu/~babaks\">Babak Shahbaba</a> (Statistics)</p>\n<p><li>Steven Cramer (Neurology)</p>\n<p><li>Greg Hickok (Cognitive Sciences)</p>\n\n<h2>Former Students at Brown University</h2>\n<p><li>Mark Fiecas (now UC-San Diego)</p>\n<p><li>Cristina Gorrostieta (now UC-Irvine)</p>\n<p><li>Hakmook Kang (now Vanderbilt Univ)</p>\n\n<h2>Other Collaborators</h2>\n\n<p><li>John Aston (Warwick Univ, UK)</li></p>\n<p><li><a href=\"http://math.fullerton.edu/sbehseta/\">Sam Behseta</a> (Cal State Univ at Fullerton)</li></p>\n<p><li>Claudia Kirch (Karlsruhe Inst Technology, Germany)</p>\n<p><li>Devin Koestler (Dartmouth Univ)</p>\n<p><li>Giovanni Motta (Columbia Univ)</p>\n<p><li>Birte Muhsal (Karlsruhe Inst Technology, Germany)</p>\n<p><li>Sofia Olhede (Univ College London, UK)</p>\n<p><li>Raquel Prado (UC-Santa Cruz)</p>\n<p><li>Wesley Thompson (UC-San Diego)</p>\n\n\n\n\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\n\n<h2>WHAT WE DO</h2>\n\n\n<p><li>We develop novel statistical methods and models for analyzing massive spatio-temporal data with complex dependence structures.</li></p>\n\n\n<p><li> We collaborate with scientists on study design, modeling and analysis of space-time data arising from various fields such as neuroscience, neurology, psychiatry, sociology and epidemiology. </li></p>\n\n<p><li> Through collaborative projects, we engage undergraduate and graduate students in all phases of inter-disciplinary research including model formulation, implementation and presentation of results. </li></p>\n\n\n<h3>ANNOUCEMENT</h3>\n\n<p><li>NSF-Funded Workshop on <a href=\"http://www.ics.uci.edu/~hombao/neurostatsw2012.html\">Developing Novel Statistical Methods for NeuroImaging</a></li></p>\n\n<p></BR></p>\n\n<h3>For pre-prints of papers and computer codes contact</h3>\n<p><a href=\"http://www.ics.uci.edu/~hombao/hernando.html\">Hernando Ombao, Ph.D.</a></p>\n<p><a href=\"http://www.stat.uci.edu\">Department of Statistics</a></p>\n<p><a href=\"http://www.uci.edu\">University of California at Irvine</a></p>\n<p>Irvine, CA 92697</p>\n<p>EMAIL: hombao AT uci DOT edu</p>\n<p>PHONE: (949) 824-5679</p>\n\n\n\r\n</div>\r\n\r\n\r\n<!-- section ends here -->\r\n\r\n\r\n</body>\r\n</html>\r\n", "encoding": "ascii"}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=courses", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1 @@
|
|||||||
|
{"url": "https://hombao.ics.uci.edu/?s=opportunities", "content": "<!DOCTYPE html>\r\n<html>\r\n<body>\r\n<head>\r\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\" />\r\n<meta name=\"description\" content=\"We develop novel statistical methods and models for analyzing complex spatio-temporal data.\" />\r\n<meta name=\"keywords\" content=\"spatiotemporal, biostatistics, research group\" />\r\n<meta name=\"author\" content=\"Hernando Ombao\" />\r\n<title>Space-Time Modeling</title>\r\n<link href=\"main.css\" rel=\"stylesheet\" type=\"text/css\" />\r\n\r\n\r\n<div id=\"header\" style=\"width:100%;\">\r\n<img src=\"logo.gif\">\r\n</div>\r\n\r\n<div id=\"footer\" style=\"width:100%;\">\r\n<a href=\"mailto:hombao@uci.edu\"><img src=\"contact.gif\" style=\"border:0\" /></a>\r\n</div>\r\n\r\n<div id=\"menu\">\r\n<HEAD>\r\n\r\n <LINK HREF=\"menu.css\" REL=\"stylesheet\" TYPE=\"text/css\" />\r\n\r\n</HEAD>\r\n\r\n\r\n\r\n<UL ID=\"nav\">\r\n\r\n\t<LI><A HREF=\"?s=people\">People</A></LI>\r\n\r\n\t<LI><A HREF=\"?s=research\">Research</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=overview\">Overview</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=projects\">Projects</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=publications\">Publications</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=grants\">Grants</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n\t<LI><a href=\"?s=education\">Education</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=courses\">Courses</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=journal\">Journal Club</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=opportunities\">Opportunities for Students</A></LI>\r\n\t\t</ul>\r\n\t</li>\r\n\r\n\t<LI><a href=\"?s=news_and_events\">News & Events</A>\r\n\t\t<UL>\r\n\t\t\t<LI><A HREF=\"?s=news\">News</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=recent\">Recent Presentations</A></LI>\r\n\t\t\t<LI><A HREF=\"?s=upcoming\">Upcoming Presentations</A></LI>\r\n\t\t</UL>\r\n\t</LI>\r\n\r\n</UL>\r\n\r\n\r\n\r\n</div>\r\n\r\n<div id=\"body\" style=\"margin-left:20px; margin-right:20px;\">\r\n\r\n\r\n\r\nfile not found", "encoding": "ascii"}
|
116
test_merge.py
Normal file
116
test_merge.py
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
import json
|
||||||
|
from posting import Posting
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
from nltk.corpus import words
|
||||||
|
random_list = [1,2,3,4,5,6,7,8,9,10]
|
||||||
|
|
||||||
|
|
||||||
|
test_data = words.words()
|
||||||
|
random.shuffle(test_data)
|
||||||
|
|
||||||
|
|
||||||
|
def random_posting(id):
|
||||||
|
return Posting(id,random.choice(random_list),random.choice(random_list),[random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list),
|
||||||
|
random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list)])
|
||||||
|
|
||||||
|
class Node():
|
||||||
|
index_value = 'Something'
|
||||||
|
postings = list()
|
||||||
|
|
||||||
|
class Index():
|
||||||
|
length = 0
|
||||||
|
index = list()
|
||||||
|
|
||||||
|
def random_partial_index(name):
|
||||||
|
part_index = Index()
|
||||||
|
part_index.index = list()
|
||||||
|
part_index.length = 0
|
||||||
|
with open(name +'.partial', 'w') as f:
|
||||||
|
for i in range(1000):
|
||||||
|
|
||||||
|
node1 = Node()
|
||||||
|
node1.index_value = random.choice(test_data).lower()
|
||||||
|
node1.postings = list()
|
||||||
|
for i in range(10):
|
||||||
|
node1.postings.append(random_posting(i))
|
||||||
|
|
||||||
|
jsonStr = json.dumps(node1, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
|
||||||
|
part_index.index.append((node1.index_value,f.tell()))
|
||||||
|
f.write(jsonStr + '\n')
|
||||||
|
part_index.length = part_index.length + 1
|
||||||
|
|
||||||
|
part_index.index.sort(key=lambda y:y[0])
|
||||||
|
jsonStr =json.dumps(part_index, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
with open(name + '.index','w') as f:
|
||||||
|
f.write(jsonStr)
|
||||||
|
|
||||||
|
def merge(partial_indices):
|
||||||
|
partial_files = list()
|
||||||
|
partial_index_files = list()
|
||||||
|
parital_index_indices = list()
|
||||||
|
merged_index = open("merged_index.full",'w')
|
||||||
|
num_indices = len(partial_indices)
|
||||||
|
|
||||||
|
#Full Index.Index and Length
|
||||||
|
full_index = Index()
|
||||||
|
full_index.index = list()
|
||||||
|
full_index.length = 0
|
||||||
|
|
||||||
|
for partial_index in partial_indices:
|
||||||
|
file = open(partial_index+'.partial','r')
|
||||||
|
partial_files.append(file)
|
||||||
|
index = open(partial_index+'.index','r')
|
||||||
|
partial_index_files.append(index)
|
||||||
|
|
||||||
|
for partial_index_file in partial_index_files:
|
||||||
|
partial_index_file.seek(0,0)
|
||||||
|
parital_index_indices.append(json.loads(partial_index_file.readline()))
|
||||||
|
|
||||||
|
#Start all indexes at 0
|
||||||
|
for partial_file in partial_files:
|
||||||
|
partial_file.seek(0,0)
|
||||||
|
|
||||||
|
pointers = [0]*num_indices
|
||||||
|
|
||||||
|
while(True):
|
||||||
|
|
||||||
|
#Get all values from all indices to find min
|
||||||
|
value = None
|
||||||
|
values = list()
|
||||||
|
for i in range(num_indices):
|
||||||
|
if pointers[i] < parital_index_indices[i]['length']:
|
||||||
|
values.append(parital_index_indices[i]['index'][pointers[i]][0])
|
||||||
|
|
||||||
|
if(len(values) == 0):
|
||||||
|
break
|
||||||
|
value = min(values)
|
||||||
|
|
||||||
|
#Get data from the min value of all indices if exists then save to mergedIndex
|
||||||
|
if value == None:
|
||||||
|
print("I have crashed some how by not getting min value")
|
||||||
|
break
|
||||||
|
|
||||||
|
node = Node()
|
||||||
|
node.index_value = value
|
||||||
|
for i in range(num_indices):
|
||||||
|
if pointers[i] < parital_index_indices[i]['length'] and parital_index_indices[i]['index'][pointers[i]][0] == value:
|
||||||
|
to_seek = parital_index_indices[i]['index'][pointers[i]][1]
|
||||||
|
partial_files[i].seek(to_seek,0)
|
||||||
|
json_value = partial_files[i].readline()
|
||||||
|
temp_node = json.loads(json_value)
|
||||||
|
node.postings = node.postings + temp_node['postings']
|
||||||
|
pointers[i] = pointers[i] + 1
|
||||||
|
|
||||||
|
node.postings.sort(key=lambda y:y['doc_id'])
|
||||||
|
full_index.index.append((value,merged_index.tell()))
|
||||||
|
full_index.length = full_index.length + 1
|
||||||
|
jsonStr = json.dumps(node,default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
merged_index.write(jsonStr + '\n')
|
||||||
|
|
||||||
|
full_index.index.sort(key=lambda y:y[0])
|
||||||
|
jsonStr =json.dumps(full_index, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
with open("merged_index.index" ,'w') as f:
|
||||||
|
f.write(jsonStr)
|
165
worker.py
165
worker.py
@ -1,114 +1,129 @@
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shelve
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from time import perf_counter
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
|
||||||
#Data process
|
#Data process
|
||||||
from nltk.tokenize import word_tokenize
|
from nltk.tokenize import word_tokenize
|
||||||
from nltk.stem import PorterStemmer
|
from nltk.stem import PorterStemmer
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from collections import Counter
|
|
||||||
|
|
||||||
from posting import Posting
|
from posting import Posting
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
class Node():
|
||||||
|
index_value = ''
|
||||||
|
postings = list()
|
||||||
|
|
||||||
|
class Index():
|
||||||
|
length = 0
|
||||||
|
index = list()
|
||||||
|
|
||||||
class Worker(Thread):
|
class Worker(Thread):
|
||||||
def __init__(self,indexer,target):
|
def __init__(self,worker_id,indexer):
|
||||||
self.file = target
|
|
||||||
self.indexer = indexer
|
self.indexer = indexer
|
||||||
|
self.stemmer = PorterStemmer()
|
||||||
|
self.worker_id = worker_id
|
||||||
|
self.num_partial = 0
|
||||||
|
self.index = dict()
|
||||||
super().__init__(daemon=True)
|
super().__init__(daemon=True)
|
||||||
|
|
||||||
|
def dump(self):
|
||||||
|
part_index = Index()
|
||||||
|
part_index.length = 0
|
||||||
|
part_index.index = list()
|
||||||
|
|
||||||
|
cur_partial_index_str = str(self.worker_id) + "_" + str(self.num_partial) + '.partial'
|
||||||
|
cur_partial_index_index_str = str(self.worker_id) + "_" + str(self.num_partial) + '.index'
|
||||||
|
|
||||||
|
|
||||||
|
cur_partial_index = open(cur_partial_index_str,'w')
|
||||||
|
cur_partial_index_index = open(cur_partial_index_index_str,'w')
|
||||||
|
|
||||||
|
for key in self.index:
|
||||||
|
node = Node()
|
||||||
|
node.index_value = key
|
||||||
|
node.postings = self.index[key]
|
||||||
|
|
||||||
|
jsonStr = json.dumps(node, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
|
||||||
|
part_index.index.append((node.index_value,cur_partial_index.tell()))
|
||||||
|
cur_partial_index.write(jsonStr + '\n')
|
||||||
|
part_index.length = part_index.length + 1
|
||||||
|
|
||||||
|
part_index.index.sort(key=lambda y:y[0])
|
||||||
|
jsonStr =json.dumps(part_index, default=lambda o: o.__dict__,sort_keys=False)
|
||||||
|
cur_partial_index_index.write(jsonStr)
|
||||||
|
|
||||||
|
self.num_partial = self.num_partial + 1
|
||||||
|
self.indexer.add_partial_index(str(self.worker_id) + "_" + str(self.num_partial))
|
||||||
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
print("Target: " + str(self.file))
|
while True:
|
||||||
ticker = perf_counter()
|
target = self.indexer.get_next_file()
|
||||||
tic = perf_counter()
|
if not target:
|
||||||
file_load = open(self.file)
|
self.dump()
|
||||||
|
print("Worker " + str(self.worker_id) + " died")
|
||||||
|
break
|
||||||
|
file_load = open(target)
|
||||||
data = json.load(file_load)
|
data = json.load(file_load)
|
||||||
soup = BeautifulSoup(data["content"],features="lxml")
|
soup = BeautifulSoup(data["content"],features="lxml")
|
||||||
words = word_tokenize(soup.get_text())
|
doc_id = target[target.rfind('/')+1:-5]
|
||||||
toc = perf_counter()
|
url = data['url']
|
||||||
if toc - tic > 1 :
|
print("Worker " + str(self.worker_id) + " working on " + url)
|
||||||
print("Took " + str(toc - tic) + "seconds to tokenize text !")
|
|
||||||
|
|
||||||
tokenized_words = list()
|
|
||||||
stemmed_words = list()
|
|
||||||
|
|
||||||
important = {'b' : [], 'h1' : [], 'h2' : [], 'h3' : [], 'title' : []}
|
important = {'b' : [], 'h1' : [], 'h2' : [], 'h3' : [], 'title' : []}
|
||||||
for key_words in important.keys():
|
for key_words in important.keys():
|
||||||
for i in soup.findAll(key_words):
|
for i in soup.findAll(key_words):
|
||||||
for word in word_tokenize(i.text):
|
for word in word_tokenize(i.text):
|
||||||
important[key_words].append(self.indexer.stemmer.stem(word))
|
important[key_words].append(self.stemmer.stem(word))
|
||||||
|
|
||||||
tic = perf_counter()
|
# Gets a cleaner version text comparative to soup.get_text()
|
||||||
for word in words:
|
clean_text = ' '.join(soup.stripped_strings)
|
||||||
if word != "" and re.fullmatch('[A-Za-z0-9]+',word):
|
# Looks for large white space, tabbed space, and other forms of spacing and removes it
|
||||||
tokenized_words.append(word)
|
# Regex expression matches for space characters excluding a single space or words
|
||||||
toc = perf_counter()
|
clean_text = re.sub(r'\s[^ \w]', '', clean_text)
|
||||||
if toc - tic > 1 :
|
# Tokenizes text and joins it back into an entire string. Make sure it is an entire string is essential for get_tf_idf to work as intended
|
||||||
print("Took " + str(toc - tic) + "seconds to isalnum text !")
|
clean_text = " ".join([i for i in clean_text.split() if i != "" and re.fullmatch('[A-Za-z0-9]+', i)])
|
||||||
|
# Stems tokenized text
|
||||||
|
clean_text = " ".join([self.stemmer.stem(i) for i in clean_text.split()])
|
||||||
|
# Put clean_text as an element in a list because get_tf_idf workers properly with single element lists
|
||||||
|
|
||||||
tic = perf_counter()
|
tokens = word_tokenize(clean_text)
|
||||||
for word in tokenized_words:
|
|
||||||
stemmed_words.append(self.indexer.stemmer.stem(word))
|
|
||||||
|
|
||||||
toc = perf_counter()
|
#counter(count,positionals)
|
||||||
if toc - tic > 1 :
|
|
||||||
print("Took " + str(toc - tic) + "seconds to stemmed text !")
|
|
||||||
|
|
||||||
"""
|
counter = dict()
|
||||||
tfidf = TfidfVectorizer(ngram_range=(1,3)) # ngram_range is range of n-values for different n-grams to be extracted (1,3) gets unigrams, bigrams, trigrams
|
#We calculating tf_raw, and positionals here
|
||||||
tfidf_matrix = tfidf.fit_transform(stemmed_words) # fit trains the model, transform creates matrix
|
for i in range(len(tokens)):
|
||||||
#df = pd.DataFrame(tfidf_matrix.toarray(), columns = tfidf.get_feature_names_out()) # store value of matrix to associated word/n-gram
|
word = tokens[i]
|
||||||
tfidf.sget_feature_names_out()
|
if word in counter:
|
||||||
#tf_idf_dict = df.to_dict() # transform dataframe to dict *could be expensive the larger the data gets, tested on ~1000 word doc and took 0.002 secs to run
|
counter[word][0] = counter[word][0] + 1
|
||||||
|
counter[word][1].append(i)
|
||||||
|
else:
|
||||||
|
counter[word] = [1,list()]
|
||||||
|
counter[word][1].append(i)
|
||||||
|
|
||||||
print(tfidf_matrix)
|
doc_length = len(tokens)
|
||||||
"""
|
for index in counter:
|
||||||
|
if index in self.index:
|
||||||
|
postings = self.index[index]
|
||||||
|
postings.append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1]))
|
||||||
|
else:
|
||||||
|
self.index[index] = list()
|
||||||
|
self.index[index].append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1]))
|
||||||
|
self.index[index].sort(key=lambda y:y.doc_id)
|
||||||
|
|
||||||
tfIdfVectorizer=TfidfVectorizer(use_idf=True)
|
#10 Megabytes index (in Ram approx)
|
||||||
tfIdf = tfIdfVectorizer.fit_transform(stemmed_words)
|
if sys.getsizeof(self.index) > 500000:
|
||||||
df = pd.DataFrame(tfIdf[0].T.todense(), index=tfIdfVectorizer.get_feature_names_out(), columns=["TF-IDF"])
|
self.dump()
|
||||||
df = df.sort_values('TF-IDF', ascending=False)
|
|
||||||
|
|
||||||
print(df.head(25))
|
|
||||||
|
|
||||||
for word in tf_idf_dict.keys():
|
|
||||||
tic = perf_counter()
|
|
||||||
print(tf_idf_dict)
|
|
||||||
weight = 1.0
|
|
||||||
for k,v in important.items():
|
|
||||||
if k == 'b' and word in v:
|
|
||||||
weight = 1.2
|
|
||||||
elif k == 'h1' and word in v:
|
|
||||||
weight = 1.75
|
|
||||||
elif k == 'h2' and word in v:
|
|
||||||
weight = 1.5
|
|
||||||
elif k == 'h3' and word in v:
|
|
||||||
weight = 1.2
|
|
||||||
elif k == 'title' and word in v:
|
|
||||||
weight = 2
|
|
||||||
|
|
||||||
posting = Posting(data["url"],tf_idf_dict[word]*weight)
|
|
||||||
|
|
||||||
toc = perf_counter()
|
|
||||||
if toc - tic > 1 :
|
|
||||||
print("Took " + str(toc - tic) + "seconds to tf_idf text !")
|
|
||||||
|
|
||||||
tic = perf_counter()
|
|
||||||
self.indexer.save_index(word,posting)
|
|
||||||
toc = perf_counter()
|
|
||||||
if toc - tic > 1 :
|
|
||||||
print("Took " + str(toc - tic) + "seconds to save text !")
|
|
||||||
|
|
||||||
tocker = perf_counter()
|
|
||||||
print("Finished " + data['url'] + "\n" + str(tocker-ticker))
|
|
||||||
|
Loading…
Reference in New Issue
Block a user