diff --git a/indexer.py b/indexer.py index c3cb9ce..b1ed4c6 100644 --- a/indexer.py +++ b/indexer.py @@ -17,6 +17,7 @@ from bs4 import BeautifulSoup from time import perf_counter import time import threading +from threading import Lock #Data process @@ -34,43 +35,134 @@ from worker import Worker class Indexer(): - def __init__(self,restart): + def __init__(self,restart,list_partials,weight,data_paths,worker_factory=Worker): #Config stuffs - self.path = "data/DEV/" + self.path = "test/" self.restart = restart - - def get_data(self): - num_threads = 1 - threads = list() + self.list_partials = list_partials + self.weight = weight + self.data_paths = data_paths + self.data_paths_lock = Lock() + self.list_partials_lock = Lock() + self.workers = list() + self.worker_factory = worker_factory + def start_async(self): + self.workers = [ + self.worker_factory(worker_id,self) + for worker_id in range(8)] + for worker in self.workers: + worker.start() + + def start(self): + self.start_async() + self.join() + + def join(self): + for worker in self.workers: + worker.join() + + def get_data_path(self): for directory in os.listdir(self.path): for file in os.listdir(self.path + "/" + directory + "/"): - while True: - file_path = self.path + "" + directory + "/"+file - if len(threads) < num_threads: - thread = Worker(self,file_path) - threads.append(thread) - thread.start() - break - else: - if not threads[index].is_alive(): - threads[index] = Worker(self,file_path) - threads[index].start() - break - else: - index = index + 1 - if(index >= num_threads): - index = 0 - time.sleep(.1) + self.data_paths.append("data/DEV/" + directory + "/"+file) + + def get_next_file(self): + self.data_paths_lock.acquire() + try: + holder = self.data_paths.pop() + self.data_paths_lock.release() + return holder + except IndexError: + self.data_paths_lock.release() + return None + def add_partial_index(self,partial_index): + self.list_partials_lock.acquire() + self.list_partials.append(partial_index) + self.list_partials_lock.release() + #Found 55770 documents # + #getting important tokens + + def merge(self): + partial_files = list() + partial_index_files = list() + parital_index_indices = list() + merged_index = open("merged_index.full",'w') + num_indices = len(self.list_partials) + + #Full Index.Index and Length + full_index = Index() + full_index.index = list() + full_index.length = 0 + + for partial_index in self.list_partials: + file = open(partial_index+'.partial','r') + partial_files.append(file) + index = open(partial_index+'.index','r') + partial_index_files.append(index) + + for partial_index_file in partial_index_files: + partial_index_file.seek(0,0) + parital_index_indices.append(json.loads(partial_index_file.readline())) + + #Start all indexes at 0 + for partial_file in partial_files: + partial_file.seek(0,0) + + pointers = [0]*num_indices + + while(True): + + #Get all values from all indices to find min + value = None + values = list() + for i in range(num_indices): + if pointers[i] < parital_index_indices[i]['length']: + values.append(parital_index_indices[i]['index'][pointers[i]][0]) + + if(len(values) == 0): + break + value = min(values) + + #Get data from the min value of all indices if exists then save to mergedIndex + if value == None: + print("I have crashed some how by not getting min value") + break + + node = Node() + node.index_value = value + for i in range(num_indices): + if pointers[i] < parital_index_indices[i]['length'] and parital_index_indices[i]['index'][pointers[i]][0] == value: + to_seek = parital_index_indices[i]['index'][pointers[i]][1] + partial_files[i].seek(to_seek,0) + json_value = partial_files[i].readline() + temp_node = json.loads(json_value) + node.postings = node.postings + temp_node['postings'] + pointers[i] = pointers[i] + 1 + + node.postings.sort(key=lambda y:y['doc_id']) + full_index.index.append((value,merged_index.tell())) + full_index.length = full_index.length + 1 + jsonStr = json.dumps(node,default=lambda o: o.__dict__,sort_keys=False) + merged_index.write(jsonStr + '\n') + + full_index.index.sort(key=lambda y:y[0]) + jsonStr =json.dumps(full_index, default=lambda o: o.__dict__,sort_keys=False) + with open("merged_index.index" ,'w') as f: + f.write(jsonStr) + - #getting important tokens def main(): - indexer = Indexer(True,0) - indexer.get_data() + indexer = Indexer(True,list(),list(),list()) + indexer.get_data_path() + indexer.start() + indexer.merge() + + if __name__ == "__main__": main() \ No newline at end of file diff --git a/merged_index.full b/merged_index.full new file mode 100644 index 0000000..e69de29 diff --git a/posting.py b/posting.py index 4edf6c6..898a5c2 100644 --- a/posting.py +++ b/posting.py @@ -1,11 +1,16 @@ #Posting class for indexer, will probably be more complex as we keep adding crap to it class Posting(): - def __init__(self,doc_id,tf_raw,tf_idf,positionals): + def __init__(self,doc_id,url,tf_raw,tf_idf,positionals): self.doc_id = doc_id + self.url = url self.tf_raw = tf_raw self.tf_idf = tf_idf self.positionals = positionals + def __repr__(self): + return "Doc_id:" + str(self.doc_id) + " URL:" + self.url + " tf_raw:" + str(self.tf_raw) + " tf_idf:" + str(self.tf_idf) + " positionals:" + str(self.positionals) + def __str__(self): + return "Doc_id:" + str(self.doc_id) + " URL:" + self.url + " tf_raw:" + str(self.tf_raw) + " tf_idf:" + str(self.tf_idf) + " positionals:" + str(self.positionals) def comparator(self): #Some custom comparator for sorting postings later diff --git a/test.py b/test.py index f2d8011..361c23c 100644 --- a/test.py +++ b/test.py @@ -1,115 +1,59 @@ +from threading import Thread import json -from posting import Posting -import math +import os +import shelve import sys -import random -from nltk.corpus import words -random_list = [1,2,3,4,5,6,7,8,9,10] +from bs4 import BeautifulSoup +from time import perf_counter +from nltk.stem import PorterStemmer +import nltk +import time +from posting import Posting +import re -test_data = words.words() -random.shuffle(test_data) +self_index = dict() +stemmer = PorterStemmer() +target = 'data/DEV/aiclub_ics_uci_edu/8ef6d99d9f9264fc84514cdd2e680d35843785310331e1db4bbd06dd2b8eda9b.json' +file_load = open(target) +data = json.load(file_load) +doc_id = target[target.rfind('/')+1:-5] +url = data['url'] +soup = BeautifulSoup(data["content"],features="lxml") +# Gets a cleaner version text comparative to soup.get_text() +clean_text = ' '.join(soup.stripped_strings) +# Looks for large white space, tabbed space, and other forms of spacing and removes it +# Regex expression matches for space characters excluding a single space or words +clean_text = re.sub(r'\s[^ \w]', '', clean_text) +# Tokenizes text and joins it back into an entire string. Make sure it is an entire string is essential for get_tf_idf to work as intended +clean_text = " ".join([i for i in clean_text.split() if i != "" and re.fullmatch('[A-Za-z0-9]+', i)]) +# Stems tokenized text +clean_text = " ".join([stemmer.stem(i) for i in clean_text.split()]) -class Node(): - index_value = '' - postings = list() +tokens = nltk.word_tokenize(clean_text) -class Index(): - length = 0 - index = list() +#counter(count,positionals) -def random_posting(id): - return Posting(id,random.choice(random_list),random.choice(random_list),[random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list), - random.choice(random_list),random.choice(random_list),random.choice(random_list),random.choice(random_list)]) +counter = dict() +for i in range(len(tokens)): + word = tokens[i] + if word in counter: + counter[word][0] = counter[word][0] + 1 + counter[word][1].append(i) + else: + counter[word] = [1,list()] + counter[word][1].append(i) +print(counter) +doc_length = len(tokens) +for index in counter: + if index in self_index: + postings = self_index[index] + postings.append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1])) + else: + self_index[index] = list() + self_index[index].append(Posting(doc_id,url,counter[index][0]/doc_length,0,counter[index][1])) -def random_partial_index(name): - part_index = Index() - part_index.index = list() - part_index.length = 0 - with open(name +'.partial', 'w') as f: - for i in range(1000): +for index in self_index: + print(index + str(self_index[index]) + '\n') - node1 = Node() - node1.index_value = random.choice(test_data).lower() - node1.postings = list() - for i in range(10): - node1.postings.append(random_posting(i)) - - jsonStr = json.dumps(node1, default=lambda o: o.__dict__,sort_keys=False) - - part_index.index.append((node1.index_value,f.tell())) - f.write(jsonStr + '\n') - part_index.length = part_index.length + 1 - - part_index.index.sort(key=lambda y:y[0]) - jsonStr =json.dumps(part_index, default=lambda o: o.__dict__,sort_keys=False) - with open(name + '.index','w') as f: - f.write(jsonStr) - -def merge(partial_indices): - partial_files = list() - partial_index_files = list() - parital_index_indices = list() - merged_index = open("merged_index.full",'w') - num_indices = len(partial_indices) - - #Full Index.Index and Length - full_index = Index() - full_index.index = list() - full_index.length = 0 - - for partial_index in partial_indices: - file = open(partial_index+'.partial','r') - partial_files.append(file) - index = open(partial_index+'.index','r') - partial_index_files.append(index) - - for partial_index_file in partial_index_files: - partial_index_file.seek(0,0) - parital_index_indices.append(json.loads(partial_index_file.readline())) - - #Start all indexes at 0 - for partial_file in partial_files: - partial_file.seek(0,0) - - pointers = [0]*num_indices - - while(True): - - #Get all values from all indices to find min - value = None - values = list() - for i in range(num_indices): - if pointers[i] < parital_index_indices[i]['length']: - values.append(parital_index_indices[i]['index'][pointers[i]][0]) - - if(len(values) == 0): - break - value = min(values) - - #Get data from the min value of all indices if exists then save to mergedIndex - if value == None: - print("I have crashed some how by not getting min value") - break - - node = Node() - node.index_value = value - for i in range(num_indices): - if pointers[i] < parital_index_indices[i]['length'] and parital_index_indices[i]['index'][pointers[i]][0] == value: - to_seek = parital_index_indices[i]['index'][pointers[i]][1] - partial_files[i].seek(to_seek,0) - json_value = partial_files[i].readline() - temp_node = json.loads(json_value) - node.postings = node.postings + temp_node['postings'] - pointers[i] = pointers[i] + 1 - - node.postings.sort(key=lambda y:y['doc_id']) - full_index.index.append((value,merged_index.tell())) - full_index.length = full_index.length + 1 - jsonStr = json.dumps(node,default=lambda o: o.__dict__,sort_keys=False) - merged_index.write(jsonStr + '\n') - - full_index.index.sort(key=lambda y:y[0]) - jsonStr =json.dumps(full_index, default=lambda o: o.__dict__,sort_keys=False) - with open("merged_index.index" ,'w') as f: - f.write(jsonStr) +print("The size of the dictionary is {} bytes".format(sys.getsizeof(self_index))) \ No newline at end of file diff --git a/test/aiclub_ics_uci_edu/8ef6d99d9f9264fc84514cdd2e680d35843785310331e1db4bbd06dd2b8eda9b.json b/test/aiclub_ics_uci_edu/8ef6d99d9f9264fc84514cdd2e680d35843785310331e1db4bbd06dd2b8eda9b.json new file mode 100644 index 0000000..b4224f9 --- /dev/null +++ b/test/aiclub_ics_uci_edu/8ef6d99d9f9264fc84514cdd2e680d35843785310331e1db4bbd06dd2b8eda9b.json @@ -0,0 +1 @@ +{"url": "https://aiclub.ics.uci.edu/", "content": "\r\n\r\n\r\n\r\n \r\n
\r\n\t\t\r\n \r\n \r\n \r\n\t\t\r\nWe hold workshops, meetings for the curious ones to learn the latest technology, espcially AI.
\r\n\t\t\t\t\t\t\t\tFrom professionals of the academia to almuni from the industry world, we got you guys connected through our unique seminars.
\r\n\t\t\t\t\t\t\t\tFor those who couldn't attend our meeting due to space constraints, don't worry! We have you covered.
Catch up with our YouTube Video (Linked below)
We hold workshops, meetings for the curious ones to learn the latest technology, espcially AI.
\r\n\t\t\t\t\t\t\t\tFrom professionals of the academia to almuni from the industry world, we got you guys connected through our unique seminars.
\r\n\t\t\t\t\t\t\t\tFor those who couldn't attend our meeting due to space constraints, don't worry! We have you covered.
Catch up with our YouTube Video (Linked below)
Professor Ihler is the advisor of AI@UCI.
Here are some areas that he has been working on.
\"I work in artificial intelligence and machine learning, focusing on statistical methods for learning from data and on approximate inference techniques for graphical models. Applications of my work include data mining and information fusion in sensor networks, computer vision and image processing, and computational biology.\"
We hold workshops, meetings for the curious ones to learn the latest technology, espcially AI.
\r\n\t\t\t\t\t\t\t\tFrom professionals of the academia to almuni from the industry world, we got you guys connected through our unique seminars.
\r\n\t\t\t\t\t\t\t\tFor those who couldn't attend our meeting due to space constraints, don't worry! We have you covered.
Catch up with our YouTube Video (Linked below)
Professor Ihler is the advisor of AI@UCI.
Here are some areas that he has been working on.
\"I work in artificial intelligence and machine learning, focusing on statistical methods for learning from data and on approximate inference techniques for graphical models. Applications of my work include data mining and information fusion in sensor networks, computer vision and image processing, and computational biology.\"
\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tThe Alderis project focuses on the application of Domain Specific Modeling Languages (DSMLs) and meta-modeling to specify a common semantic domain for the analysis of distributed real-time embedded (DRE) systems.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tThe Alderis language has both a visual and textual syntax with formally defined semantics. Alderis models can be verified directly by the open-source Distributed Real-time Embedded Analysis Method (DREAM) tool available for download at http://dre.sourceforge.net.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\nComponent-based development is an emerging paradigm for the design of distributed real-time embedded (DRE) systems with hard QoS support. Components refer to reusable pieces of solutions, which can be configured and composed together to provide a service. Alderis plans to support this paradigm shift by providing a language and semantic domain for the model-driven development (MDD) of DRE systems.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\nThe Alderis language is specified using meta-modeling as shown in the figure below. We use the GME modeling environment to specify Alderis. The figure shows a part of the Alderis meta-model with its corresponding concrete syntax. The red arrows show how modeling elements and their relations are specified by the meta-model.\n\n\t\t\t\t\t\t
\n\n\n\t\t\t\t\t\t\nA key property of the Alderis language is that it has formally defined semantics allowing real-time verification using timed automata model checker tools such as UPPAAL and the Verimag IF toolset. The DREAM tool provides a way to automatically generate the timed automata models from the Alderis specification. DREAM also provides a way for random simulations that can be used efficiently to find bugs in designs that are too large and lead to state space explosions.\n\n\t\t\t\t\t\t
\n\n\t\t\t\tThis section illustrates the use of the Alderis language in a case study from the domain of avionics distributed real-time embedded (DRE) systems. Figure 1 shows the component-based architecture of the system, which is built upon the Boeing Bold Stroke real-time middleware. This application is deployed on a non-preemptive multiprocessor platform. As shown in Figure 1, this application is driven by five Timer components deployed on five CPUs.
\n\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t
\nComputations on different processors are driven by their respective\ntimers. Components, however, do not necessarily execute with the\ntimer's rate, as seen in the NAV_DISPLAY component's case. It\nis executed more often to serve remote requests than to serve local\nrequests on CPU_3.\n\n\t\t\t\t\t\t\t
\nFigure 3 shows how we modeled the system in the Uppaal model checker tool. The application consits of 11 Task\ncomponents and 11 event channels, which 5 are local and used only for\nbuffering. The application is deployed on 5 processors. We have to model event channels explicitly (1)\nwhen we have to buffer events or (2) on remote event channels which\nhave measureable delays. All the event channels satisfy one of the\nabove conditions, except the timer's event channels that have been\nabstracted out in the model.\n\t\t\t\t\t\t\t
\n\n\t\t\t\t\t\t\t\nThe scheduling policies are represented by Schedulers in the\nDRE Semantic Domain. Since the Bold Stroke application is\ndeployed on a 5-processor architecture we define 5 schedulers as shown\non Figure 3. The schedulers get more complex\naccording to the scheduling policies. The automatic generation of the\nmodels provides a safe way to ensure the correct guard conditions and\nassignments. The timed automata model shown in Figure 3 corresponding \nto the Bold Stroke system shown in Figure 1 has been shown \nto be schedulable.\n\t\t\t\t\t\t\t
\n\n\t\t\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe EMSOFT publication cited on the left describes an ambiguity in the final version of the AMBA AHB specification. We do not claim in any way that the AMBA AHB protocol is incorrect, nor do we claim that it contains irresolvable contradictions. However, the AMBA AHB specification does not mention this problem and therefore it is up to the designers to find and resolve this and other special cases, and these implementations may not work with each other even though they all correspond to the specification. This shows the need to verify even simple MPSoC designs rather than rely on protocols themselves in general as they do not enforce a correct working system. These problems could be easily overcome by providing a formal specification rather than natural languages that are prone to ambiguities.\n\t\t\t\t\t\t
\n\n\t\t\t\t\t\t\n\t\t\t\t\t\t\tThe following example demonstrates how the simultaneous use of the HRESP=RETRY response and the HSPLITx unmask request in the same clock cycle by the slave may result in a deadlock situation:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe following example shows that disallowing the simultaneous use of the HRESP=RETRY response and the HSPLITx unmask request allows to avoid the deadlock:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe same problem can be shown for three masters:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe following example shows that disallowing the simultaneous use of the HRESP=RETRY response and the HSPLITx unmask request allows to avoid the deadlock using three masters as well:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe final model that shows the correctness of our design:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tThe finite state machines were changed for the performance evaluation to provide better scalability. Therefore we need to verify that the resulting model is still correct:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tPerformance evaluation results using 64x64 pixel tiles for JPEG 2000 compression:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\tPerformance evaluation results using 128x128 pixel tiles for JPEG 2000 compression:\n\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tThe Alderis project focuses on the application of Domain Specific Modeling Languages (DSMLs) and meta-modeling to specify a common semantic domain for the analysis of distributed real-time embedded (DRE) systems.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\t\tThe Alderis language has both a visual and textual syntax with formally defined semantics. Alderis models can be verified directly by the open-source Distributed Real-time Embedded Analysis Method (DREAM) tool available for download at http://dre.sourceforge.net.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\nComponent-based development is an emerging paradigm for the design of distributed real-time embedded (DRE) systems with hard QoS support. Components refer to reusable pieces of solutions, which can be configured and composed together to provide a service. Alderis plans to support this paradigm shift by providing a language and semantic domain for the model-driven development (MDD) of DRE systems.\n\t\t\t\t\t\t\n\t\t\t\t\t\t
\n\t\t\t\t\n\t\t\t\t\t\t\n\t\t\t\t\t\t\nThe Alderis language is specified using meta-modeling as shown in the figure below. We use the GME modeling environment to specify Alderis. The figure shows a part of the Alderis meta-model with its corresponding concrete syntax. The red arrows show how modeling elements and their relations are specified by the meta-model.\n\n\t\t\t\t\t\t
\n\n\n\t\t\t\t\t\t\nA key property of the Alderis language is that it has formally defined semantics allowing real-time verification using timed automata model checker tools such as UPPAAL and the Verimag IF toolset. The DREAM tool provides a way to automatically generate the timed automata models from the Alderis specification. DREAM also provides a way for random simulations that can be used efficiently to find bugs in designs that are too large and lead to state space explosions.\n\n\t\t\t\t\t\t
\n\n\t\t\t\t\n\n
\n\nRecent work of Paul Hilario
\n\n\nLos Banos, Laguna
\nPhilippines
\n\n
\n\n\nBUGAW (Shoo!)\n\n
\n\nThis painting is about at least two subjects. One subject is about the environment. Notice the slingshot on the boy's pocket? \nHe decided not to hurt or kill any of the birds.\nAlso, if you can feel the way I feel about the painting, don't you get the impression that they are rejoicing? \nThe two kid's raised arms are in celebration of a forthcoming bountiful rice harvest. The fields are heavily filled with golden rice.\n\n
\n\n\nSalat Nguni't Sapat (Not Enough but Enough)\n\n
\n\nMany children in rural areas in the Philippines walk miles just to get to school.
\nThey wear ragged clothes, worn out and broken footwear and have barely enough money for food and school supplies. \nSome classes have more than 40 students so everybody has to share tables and chairs and the room is cramped. \nBut surprisingly so they feel happy that they have the opportunity to go to school while many don't have that chance at all. \nThis is an example of a trait of Filipinos - to always look at the brighter side of things no matter how trying times can be.\n\n
\n
\n\n\n
\n
\nHernando Ombao, Ph.D. \n\n
\n\nVisiting Scholar
\nInstitute of Statistics
\nUniversity of the Philippines at Los Banos
\n\n
\nPRESENTATIONS\n
\n\n\n\nSELECTED PAPERS\n
\n\n\n\n
\n \nRESEARCH AREAS\n \nRESEARCH GROUPS \n\n\nHernando Ombao, Ph.D. \n\n
\n\nProfessor
\nDepartment of Statistics
\nUniversity of California at Irvine
\nBren Hall, Room 2206
\nIrvine, CA 92697 USA
\nPhone: (949) 824-5679
\nEmail: hombao AT uci DOT edu
\n\n\nCV\n\n\n
\n
\n\n\n\n
\n\n\n\n\n\n
\n\n\n\n\n
\n \n24 July to 26 July 2012 at San Diego, CA
\n\n \nCo-orgznized with M. Lindquist (Johns Hopkins) \nand W. Thompson (UCSD)
\n \n
\n\nI Support \nup-and-coming artists\n\n
\n\n\n\n", "encoding": "ascii"} \ No newline at end of file diff --git a/test/hombao_ics_uci_edu/6d0828ad3dfb8ba58be60b61adb2b9ae55cd9f5ec11a58d992bbc2377ad4d42e.json b/test/hombao_ics_uci_edu/6d0828ad3dfb8ba58be60b61adb2b9ae55cd9f5ec11a58d992bbc2377ad4d42e.json new file mode 100644 index 0000000..48c25bd --- /dev/null +++ b/test/hombao_ics_uci_edu/6d0828ad3dfb8ba58be60b61adb2b9ae55cd9f5ec11a58d992bbc2377ad4d42e.json @@ -0,0 +1 @@ +{"url": "https://hombao.ics.uci.edu/?s=research", "content": "\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\nUniversity of California at Irvine
\nIrvine, CA 92697
\nEMAIL: hombao AT uci DOT edu
\nPHONE: (949) 824-5679
\n\n\n\r\nH. KANG is the 2011 Winner of the John Van Ryzin Award.
\n\nM Fiecas, receipient of the 2010 Student Paper Award at New England Statistics Symposium at Cambridge, MA.
\n\nthe United States: Analysis of 30-year data from population-based cancer registries. International Journal of Cancer, 128, 175-1998.
\n\n\n
\n\nDeveloping Novel Statistical Methods in NeuroImaging \n\n
\n\nJuly 24-26, 2012 \n\n(immediately preceding the Joint Statistical Meeting 2012)\n\n
\n\nUniversity of California at San Diego\n\n
\nThe goal of the workshop is to identify open problems in statistical research that emerge from current challenges in neuroimaging. \n
\n\n\nThe analysis of brain data presents statistical challenges because of its massiveness, high dimensionality and complex spatio-temporal dependence structure. We expect to see open lines of statistical research especially in the areas of time series, spatial analysis, dimension reduction, statistical learning, functional data analysis, statistical computation and foundations of statistical inference. At the workshop, leaders in neuroimaging will deliver lectures on theoretical background in neuroscience and in the state-of-the-art statistical methods for the analysis of brain imaging data. The workshop topic is timely due to the increased role of late of mathematical and statistical methods in neuroimaging.\n
\n
\n\n
\n\n
FEES\n\n
\n\n
\n \nREGISTRATION (web-based)\n
\n\nLimited seats only. The deadline for application is extended to 15 May 2012. \nNote that registration details and scholarship applications done at CHECKOUT.\n\n
\n\n
SCHOLARSHIPS \n\n
Note: Application for scholarships is closed \neffective 26 April 2012.\n \n
\n\n\nWe anticipate financial support from the National Science Foundation (DMS). Partial scholarships will be available to junior scholars (PhD students and recent PhDs). There will be travel support (up to $200)\nand workshop scholarship (up to $500).\n
\n
\nIn the registration, indicate if you are applying for (1) travel support; and/or (2) partial workshop scholarship\n
\n
\nApplicants for travel support and/or housing will be notified of the decision by late May 2012.\n
\n
\nMinorities and women are especially encouraged to apply. \n
\n\n
\nWORKSHOP INSTRUCTORS\n \n
\n\nGreg Brown (UC San Diego), Richard Buxton (UCSD), Anders Dale (UCSD), Mark Fiecas (UCSD), Martin Lindquist (Columbia University), Tom Liu (UCSD), \nTim Mullen (UCSD), Hernando Ombao (UC Irvine), \nWesley Thompson (UCSD)\n\n
\n\n
\n\n
\nTENTATIVE PROGRAM\n \n
\n
\n\nJuly 24 \n\n
\nJuly 25\n\n
\n\n
\n\n
\n\n
\nJuly 26\n\n
\n\n
\n\n
\n\n\n
\n\n
\n
\n\n
\nORGANIZING COMMITTEE\n \n
\n\nMartin Lindquist (Columbia University)
\n Hernando Ombao (Univ California at Irvine)
\nWesley Thompson (Univ California at San Diego)
\n\n
\n\n
\nINQUIRIES\n\n\n
\nSend to stats-neuro@ics.uci.edu\n\n\n
\n\n
\nACKNOWLEDGEMENT\n\n\n
\nThis workshop is mainly supported by the US National Science Foundation (Division of Mathematical Sciences). We also acknowlege support by the UC Irvine Bren School of Information Sciences.\n\n\n
Professor Ihler is the advisor of AI@UCI.
\r\n\t\t\t\t\t\t\tHere are some areas that he has been working on.
\"I work in artificial intelligence and machine learning, focusing on statistical methods for learning from data and on approximate inference techniques for graphical models. Applications of my work include data mining and information fusion in sensor networks, computer vision and image processing, and computational biology.\"